add composer's vendor directory
This commit is contained in:
parent
01a3860d73
commit
60b094d5fa
745 changed files with 56017 additions and 1 deletions
34
vendor/fguillot/picofeed/lib/PicoFeed/Base.php
vendored
Normal file
34
vendor/fguillot/picofeed/lib/PicoFeed/Base.php
vendored
Normal file
|
@ -0,0 +1,34 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use PicoFeed\Config\Config;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Base class
|
||||
*
|
||||
* @package PicoFeed
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class Base
|
||||
{
|
||||
/**
|
||||
* Config class instance
|
||||
*
|
||||
* @access protected
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param \PicoFeed\Config\Config $config Config class instance
|
||||
*/
|
||||
public function __construct(Config $config = null)
|
||||
{
|
||||
$this->config = $config ?: new Config();
|
||||
Logger::setTimezone($this->config->getTimezone());
|
||||
}
|
||||
}
|
673
vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php
vendored
Normal file
673
vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php
vendored
Normal file
|
@ -0,0 +1,673 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use LogicException;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Config\Config;
|
||||
|
||||
/**
|
||||
* Client class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class Client
|
||||
{
|
||||
/**
|
||||
* Flag that say if the resource have been modified.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $is_modified = true;
|
||||
|
||||
/**
|
||||
* HTTP Content-Type.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $content_type = '';
|
||||
|
||||
/**
|
||||
* HTTP encoding.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $encoding = '';
|
||||
|
||||
/**
|
||||
* HTTP request headers.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $request_headers = array();
|
||||
|
||||
/**
|
||||
* HTTP Etag header.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $etag = '';
|
||||
|
||||
/**
|
||||
* HTTP Last-Modified header.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $last_modified = '';
|
||||
|
||||
/**
|
||||
* Proxy hostname.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_hostname = '';
|
||||
|
||||
/**
|
||||
* Proxy port.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $proxy_port = 3128;
|
||||
|
||||
/**
|
||||
* Proxy username.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_username = '';
|
||||
|
||||
/**
|
||||
* Proxy password.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_password = '';
|
||||
|
||||
/**
|
||||
* Basic auth username.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $username = '';
|
||||
|
||||
/**
|
||||
* Basic auth password.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $password = '';
|
||||
|
||||
/**
|
||||
* Client connection timeout.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $timeout = 10;
|
||||
|
||||
/**
|
||||
* User-agent.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)';
|
||||
|
||||
/**
|
||||
* Real URL used (can be changed after a HTTP redirect).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $url = '';
|
||||
|
||||
/**
|
||||
* Page/Feed content.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $content = '';
|
||||
|
||||
/**
|
||||
* Number maximum of HTTP redirections to avoid infinite loops.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $max_redirects = 5;
|
||||
|
||||
/**
|
||||
* Maximum size of the HTTP body response.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $max_body_size = 2097152; // 2MB
|
||||
|
||||
/**
|
||||
* HTTP response status code.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $status_code = 0;
|
||||
|
||||
/**
|
||||
* Enables direct passthrough to requesting client.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
protected $passthrough = false;
|
||||
|
||||
/**
|
||||
* Do the HTTP request.
|
||||
*
|
||||
* @abstract
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
abstract public function doRequest();
|
||||
|
||||
/**
|
||||
* Get client instance: curl or stream driver.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public static function getInstance()
|
||||
{
|
||||
if (function_exists('curl_init')) {
|
||||
return new Curl();
|
||||
} elseif (ini_get('allow_url_fopen')) {
|
||||
return new Stream();
|
||||
}
|
||||
|
||||
throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add HTTP Header to the request.
|
||||
*
|
||||
* @param array $headers
|
||||
*/
|
||||
public function setHeaders($headers)
|
||||
{
|
||||
$this->request_headers = $headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the HTTP request.
|
||||
*
|
||||
* @param string $url URL
|
||||
*
|
||||
* @return Client
|
||||
*/
|
||||
public function execute($url = '')
|
||||
{
|
||||
if ($url !== '') {
|
||||
$this->url = $url;
|
||||
}
|
||||
|
||||
Logger::setMessage(get_called_class().' Fetch URL: '.$this->url);
|
||||
Logger::setMessage(get_called_class().' Etag provided: '.$this->etag);
|
||||
Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
|
||||
|
||||
$response = $this->doRequest();
|
||||
|
||||
$this->status_code = $response['status'];
|
||||
$this->handleNotModifiedResponse($response);
|
||||
$this->handleErrorResponse($response);
|
||||
$this->handleNormalResponse($response);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle not modified response.
|
||||
*
|
||||
* @param array $response Client response
|
||||
*/
|
||||
protected function handleNotModifiedResponse(array $response)
|
||||
{
|
||||
if ($response['status'] == 304) {
|
||||
$this->is_modified = false;
|
||||
} elseif ($response['status'] == 200) {
|
||||
$this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified);
|
||||
$this->etag = $this->getHeader($response, 'ETag');
|
||||
$this->last_modified = $this->getHeader($response, 'Last-Modified');
|
||||
}
|
||||
|
||||
if ($this->is_modified === false) {
|
||||
Logger::setMessage(get_called_class().' Resource not modified');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle Http Error codes
|
||||
*
|
||||
* @param array $response Client response
|
||||
*/
|
||||
protected function handleErrorResponse(array $response)
|
||||
{
|
||||
$status = $response['status'];
|
||||
if ($status == 401) {
|
||||
throw new UnauthorizedException('Wrong or missing credentials');
|
||||
} else if ($status == 403) {
|
||||
throw new ForbiddenException('Not allowed to access resource');
|
||||
} else if ($status == 404) {
|
||||
throw new InvalidUrlException('Resource not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle normal response.
|
||||
*
|
||||
* @param array $response Client response
|
||||
*/
|
||||
protected function handleNormalResponse(array $response)
|
||||
{
|
||||
if ($response['status'] == 200) {
|
||||
$this->content = $response['body'];
|
||||
$this->content_type = $this->findContentType($response);
|
||||
$this->encoding = $this->findCharset();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a request has been modified according to the parameters.
|
||||
*
|
||||
* @param array $response
|
||||
* @param string $etag
|
||||
* @param string $lastModified
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function hasBeenModified($response, $etag, $lastModified)
|
||||
{
|
||||
$headers = array(
|
||||
'Etag' => $etag,
|
||||
'Last-Modified' => $lastModified,
|
||||
);
|
||||
|
||||
// Compare the values for each header that is present
|
||||
$presentCacheHeaderCount = 0;
|
||||
foreach ($headers as $key => $value) {
|
||||
if (isset($response['headers'][$key])) {
|
||||
if ($response['headers'][$key] !== $value) {
|
||||
return true;
|
||||
}
|
||||
++$presentCacheHeaderCount;
|
||||
}
|
||||
}
|
||||
|
||||
// If at least one header is present and the values match, the response
|
||||
// was not modified
|
||||
if ($presentCacheHeaderCount > 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find content type from response headers.
|
||||
*
|
||||
* @param array $response Client response
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findContentType(array $response)
|
||||
{
|
||||
return strtolower($this->getHeader($response, 'Content-Type'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find charset from response headers.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findCharset()
|
||||
{
|
||||
$result = explode('charset=', $this->content_type);
|
||||
|
||||
return isset($result[1]) ? $result[1] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get header value from a client response.
|
||||
*
|
||||
* @param array $response Client response
|
||||
* @param string $header Header name
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getHeader(array $response, $header)
|
||||
{
|
||||
return isset($response['headers'][$header]) ? $response['headers'][$header] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Last-Modified HTTP header.
|
||||
*
|
||||
* @param string $last_modified Header value
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setLastModified($last_modified)
|
||||
{
|
||||
$this->last_modified = $last_modified;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value of the Last-Modified HTTP header.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getLastModified()
|
||||
{
|
||||
return $this->last_modified;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value of the Etag HTTP header.
|
||||
*
|
||||
* @param string $etag Etag HTTP header value
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setEtag($etag)
|
||||
{
|
||||
$this->etag = $etag;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Etag HTTP header value.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getEtag()
|
||||
{
|
||||
return $this->etag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the final url value.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
return $this->url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the url.
|
||||
*
|
||||
* @return string
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setUrl($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the HTTP response status code.
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function getStatusCode()
|
||||
{
|
||||
return $this->status_code;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the body of the HTTP response.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the content type value from HTTP headers.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getContentType()
|
||||
{
|
||||
return $this->content_type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the encoding value from HTTP headers.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getEncoding()
|
||||
{
|
||||
return $this->encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the remote resource has changed.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isModified()
|
||||
{
|
||||
return $this->is_modified;
|
||||
}
|
||||
|
||||
/**
|
||||
* return true if passthrough mode is enabled.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isPassthroughEnabled()
|
||||
{
|
||||
return $this->passthrough;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set connection timeout.
|
||||
*
|
||||
* @param int $timeout Connection timeout
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setTimeout($timeout)
|
||||
{
|
||||
$this->timeout = $timeout ?: $this->timeout;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a custom user agent.
|
||||
*
|
||||
* @param string $user_agent User Agent
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setUserAgent($user_agent)
|
||||
{
|
||||
$this->user_agent = $user_agent ?: $this->user_agent;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum number of HTTP redirections.
|
||||
*
|
||||
* @param int $max Maximum
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setMaxRedirections($max)
|
||||
{
|
||||
$this->max_redirects = $max ?: $this->max_redirects;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum size of the HTTP body.
|
||||
*
|
||||
* @param int $max Maximum
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setMaxBodySize($max)
|
||||
{
|
||||
$this->max_body_size = $max ?: $this->max_body_size;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy hostname.
|
||||
*
|
||||
* @param string $hostname Proxy hostname
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyHostname($hostname)
|
||||
{
|
||||
$this->proxy_hostname = $hostname ?: $this->proxy_hostname;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy port.
|
||||
*
|
||||
* @param int $port Proxy port
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyPort($port)
|
||||
{
|
||||
$this->proxy_port = $port ?: $this->proxy_port;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy username.
|
||||
*
|
||||
* @param string $username Proxy username
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyUsername($username)
|
||||
{
|
||||
$this->proxy_username = $username ?: $this->proxy_username;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy password.
|
||||
*
|
||||
* @param string $password Password
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyPassword($password)
|
||||
{
|
||||
$this->proxy_password = $password ?: $this->proxy_password;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the username.
|
||||
*
|
||||
* @param string $username Basic Auth username
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setUsername($username)
|
||||
{
|
||||
$this->username = $username ?: $this->username;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the password.
|
||||
*
|
||||
* @param string $password Basic Auth Password
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setPassword($password)
|
||||
{
|
||||
$this->password = $password ?: $this->password;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the passthrough mode.
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function enablePassthroughMode()
|
||||
{
|
||||
$this->passthrough = true;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable the passthrough mode.
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function disablePassthroughMode()
|
||||
{
|
||||
$this->passthrough = false;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object.
|
||||
*
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setConfig(Config $config)
|
||||
{
|
||||
if ($config !== null) {
|
||||
$this->setTimeout($config->getClientTimeout());
|
||||
$this->setUserAgent($config->getClientUserAgent());
|
||||
$this->setMaxRedirections($config->getMaxRedirections());
|
||||
$this->setMaxBodySize($config->getMaxBodySize());
|
||||
$this->setProxyHostname($config->getProxyHostname());
|
||||
$this->setProxyPort($config->getProxyPort());
|
||||
$this->setProxyUsername($config->getProxyUsername());
|
||||
$this->setProxyPassword($config->getProxyPassword());
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the HTTP status code is a redirection
|
||||
*
|
||||
* @access protected
|
||||
* @param integer $code
|
||||
* @return boolean
|
||||
*/
|
||||
public function isRedirection($code)
|
||||
{
|
||||
return $code == 301 || $code == 302 || $code == 303 || $code == 307;
|
||||
}
|
||||
}
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
/**
|
||||
* ClientException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class ClientException extends PicoFeedException
|
||||
{
|
||||
}
|
386
vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php
vendored
Normal file
386
vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php
vendored
Normal file
|
@ -0,0 +1,386 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* cURL HTTP client.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Curl extends Client
|
||||
{
|
||||
/**
|
||||
* HTTP response body.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $body = '';
|
||||
|
||||
/**
|
||||
* Body size.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
private $body_length = 0;
|
||||
|
||||
/**
|
||||
* HTTP response headers.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $response_headers = array();
|
||||
|
||||
/**
|
||||
* Counter on the number of header received.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
private $response_headers_count = 0;
|
||||
|
||||
/**
|
||||
* cURL callback to read the HTTP body.
|
||||
*
|
||||
* If the function return -1, curl stop to read the HTTP response
|
||||
*
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Chunk of data
|
||||
*
|
||||
* @return int Length of the buffer
|
||||
*/
|
||||
public function readBody($ch, $buffer)
|
||||
{
|
||||
$length = strlen($buffer);
|
||||
$this->body_length += $length;
|
||||
|
||||
if ($this->body_length > $this->max_body_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
$this->body .= $buffer;
|
||||
|
||||
return $length;
|
||||
}
|
||||
|
||||
/**
|
||||
* cURL callback to read HTTP headers.
|
||||
*
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Header line
|
||||
*
|
||||
* @return int Length of the buffer
|
||||
*/
|
||||
public function readHeaders($ch, $buffer)
|
||||
{
|
||||
$length = strlen($buffer);
|
||||
|
||||
if ($buffer === "\r\n" || $buffer === "\n") {
|
||||
++$this->response_headers_count;
|
||||
} else {
|
||||
if (!isset($this->response_headers[$this->response_headers_count])) {
|
||||
$this->response_headers[$this->response_headers_count] = '';
|
||||
}
|
||||
|
||||
$this->response_headers[$this->response_headers_count] .= $buffer;
|
||||
}
|
||||
|
||||
return $length;
|
||||
}
|
||||
|
||||
/**
|
||||
* cURL callback to passthrough the HTTP body to the client.
|
||||
*
|
||||
* If the function return -1, curl stop to read the HTTP response
|
||||
*
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Chunk of data
|
||||
*
|
||||
* @return int Length of the buffer
|
||||
*/
|
||||
public function passthroughBody($ch, $buffer)
|
||||
{
|
||||
// do it only at the beginning of a transmission
|
||||
if ($this->body_length === 0) {
|
||||
list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1]));
|
||||
|
||||
if ($this->isRedirection($status)) {
|
||||
return $this->handleRedirection($headers['Location']);
|
||||
}
|
||||
|
||||
header(':', true, $status);
|
||||
|
||||
if (isset($headers['Content-Type'])) {
|
||||
header('Content-Type:' .$headers['Content-Type']);
|
||||
}
|
||||
}
|
||||
|
||||
$length = strlen($buffer);
|
||||
$this->body_length += $length;
|
||||
|
||||
echo $buffer;
|
||||
|
||||
return $length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare HTTP headers.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
private function prepareHeaders()
|
||||
{
|
||||
$headers = array(
|
||||
'Connection: close',
|
||||
);
|
||||
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
}
|
||||
|
||||
if ($this->last_modified) {
|
||||
$headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
}
|
||||
|
||||
$headers = array_merge($headers, $this->request_headers);
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl proxy context.
|
||||
*
|
||||
* @param resource $ch
|
||||
*
|
||||
* @return resource $ch
|
||||
*/
|
||||
private function prepareProxyContext($ch)
|
||||
{
|
||||
if ($this->proxy_hostname) {
|
||||
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port);
|
||||
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
|
||||
curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname);
|
||||
|
||||
if ($this->proxy_username) {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password);
|
||||
} else {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
|
||||
return $ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl auth context.
|
||||
*
|
||||
* @param resource $ch
|
||||
*
|
||||
* @return resource $ch
|
||||
*/
|
||||
private function prepareAuthContext($ch)
|
||||
{
|
||||
if ($this->username && $this->password) {
|
||||
curl_setopt($ch, CURLOPT_USERPWD, $this->username.':'.$this->password);
|
||||
}
|
||||
|
||||
return $ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set write/header functions.
|
||||
*
|
||||
* @param resource $ch
|
||||
*
|
||||
* @return resource $ch
|
||||
*/
|
||||
private function prepareDownloadMode($ch)
|
||||
{
|
||||
$write_function = 'readBody';
|
||||
$header_function = 'readHeaders';
|
||||
|
||||
if ($this->isPassthroughEnabled()) {
|
||||
$write_function = 'passthroughBody';
|
||||
}
|
||||
|
||||
curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, $write_function));
|
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, $header_function));
|
||||
|
||||
return $ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl context.
|
||||
*
|
||||
* @return resource
|
||||
*/
|
||||
private function prepareContext()
|
||||
{
|
||||
$ch = curl_init();
|
||||
|
||||
curl_setopt($ch, CURLOPT_URL, $this->url);
|
||||
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent);
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $this->prepareHeaders());
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
|
||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||
curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory');
|
||||
curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory');
|
||||
|
||||
// Disable SSLv3 by enforcing TLSv1.x for curl >= 7.34.0 and < 7.39.0.
|
||||
// Versions prior to 7.34 and at least when compiled against openssl
|
||||
// interpret this parameter as "limit to TLSv1.0" which fails for sites
|
||||
// which enforce TLS 1.1+.
|
||||
// Starting with curl 7.39.0 SSLv3 is disabled by default.
|
||||
$version = curl_version();
|
||||
if ($version['version_number'] >= 467456 && $version['version_number'] < 468736) {
|
||||
curl_setopt($ch, CURLOPT_SSLVERSION, 1);
|
||||
}
|
||||
|
||||
$ch = $this->prepareDownloadMode($ch);
|
||||
$ch = $this->prepareProxyContext($ch);
|
||||
$ch = $this->prepareAuthContext($ch);
|
||||
|
||||
return $ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute curl context.
|
||||
*/
|
||||
private function executeContext()
|
||||
{
|
||||
$ch = $this->prepareContext();
|
||||
curl_exec($ch);
|
||||
|
||||
Logger::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
|
||||
Logger::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
|
||||
Logger::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
|
||||
Logger::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
|
||||
Logger::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
|
||||
|
||||
$curl_errno = curl_errno($ch);
|
||||
|
||||
if ($curl_errno) {
|
||||
Logger::setMessage(get_called_class().' cURL error: '.curl_error($ch));
|
||||
curl_close($ch);
|
||||
|
||||
$this->handleError($curl_errno);
|
||||
}
|
||||
|
||||
// Update the url if there where redirects
|
||||
$this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
|
||||
|
||||
curl_close($ch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the HTTP request.
|
||||
*
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
*/
|
||||
public function doRequest()
|
||||
{
|
||||
$this->executeContext();
|
||||
|
||||
list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1]));
|
||||
|
||||
if ($this->isRedirection($status)) {
|
||||
return $this->handleRedirection($headers['Location']);
|
||||
}
|
||||
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $this->body,
|
||||
'headers' => $headers,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle HTTP redirects
|
||||
*
|
||||
* @param string $location Redirected URL
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function handleRedirection($location)
|
||||
{
|
||||
$nb_redirects = 0;
|
||||
$result = array();
|
||||
$this->url = Url::resolve($location, $this->url);
|
||||
$this->body = '';
|
||||
$this->body_length = 0;
|
||||
$this->response_headers = array();
|
||||
$this->response_headers_count = 0;
|
||||
|
||||
while (true) {
|
||||
++$nb_redirects;
|
||||
|
||||
if ($nb_redirects >= $this->max_redirects) {
|
||||
throw new MaxRedirectException('Maximum number of redirections reached');
|
||||
}
|
||||
|
||||
$result = $this->doRequest();
|
||||
|
||||
if ($this->isRedirection($result['status'])) {
|
||||
$this->url = Url::resolve($result['headers']['Location'], $this->url);
|
||||
$this->body = '';
|
||||
$this->body_length = 0;
|
||||
$this->response_headers = array();
|
||||
$this->response_headers_count = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle cURL errors (throw individual exceptions).
|
||||
*
|
||||
* We don't use constants because they are not necessary always available
|
||||
* (depends of the version of libcurl linked to php)
|
||||
*
|
||||
* @see http://curl.haxx.se/libcurl/c/libcurl-errors.html
|
||||
*
|
||||
* @param int $errno cURL error code
|
||||
*/
|
||||
private function handleError($errno)
|
||||
{
|
||||
switch ($errno) {
|
||||
case 78: // CURLE_REMOTE_FILE_NOT_FOUND
|
||||
throw new InvalidUrlException('Resource not found', $errno);
|
||||
case 6: // CURLE_COULDNT_RESOLVE_HOST
|
||||
throw new InvalidUrlException('Unable to resolve hostname', $errno);
|
||||
case 7: // CURLE_COULDNT_CONNECT
|
||||
throw new InvalidUrlException('Unable to connect to the remote host', $errno);
|
||||
case 23: // CURLE_WRITE_ERROR
|
||||
throw new MaxSizeException('Maximum response size exceeded', $errno);
|
||||
case 28: // CURLE_OPERATION_TIMEDOUT
|
||||
throw new TimeoutException('Operation timeout', $errno);
|
||||
case 35: // CURLE_SSL_CONNECT_ERROR
|
||||
case 51: // CURLE_PEER_FAILED_VERIFICATION
|
||||
case 58: // CURLE_SSL_CERTPROBLEM
|
||||
case 60: // CURLE_SSL_CACERT
|
||||
case 59: // CURLE_SSL_CIPHER
|
||||
case 64: // CURLE_USE_SSL_FAILED
|
||||
case 66: // CURLE_SSL_ENGINE_INITFAILED
|
||||
case 77: // CURLE_SSL_CACERT_BADFILE
|
||||
case 83: // CURLE_SSL_ISSUER_ERROR
|
||||
$msg = 'Invalid SSL certificate caused by CURL error number ' .
|
||||
$errno;
|
||||
throw new InvalidCertificateException($msg, $errno);
|
||||
case 47: // CURLE_TOO_MANY_REDIRECTS
|
||||
throw new MaxRedirectException('Maximum number of redirections reached', $errno);
|
||||
case 63: // CURLE_FILESIZE_EXCEEDED
|
||||
throw new MaxSizeException('Maximum response size exceeded', $errno);
|
||||
default:
|
||||
throw new InvalidUrlException('Unable to fetch the URL', $errno);
|
||||
}
|
||||
}
|
||||
}
|
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php
vendored
Normal file
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* @author Bernhard Posselt
|
||||
*/
|
||||
class ForbiddenException extends ClientException
|
||||
{
|
||||
}
|
79
vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php
vendored
Normal file
79
vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php
vendored
Normal file
|
@ -0,0 +1,79 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use ArrayAccess;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Class to handle HTTP headers case insensitivity.
|
||||
*
|
||||
* @author Bernhard Posselt
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class HttpHeaders implements ArrayAccess
|
||||
{
|
||||
private $headers = array();
|
||||
|
||||
public function __construct(array $headers)
|
||||
{
|
||||
foreach ($headers as $key => $value) {
|
||||
$this->headers[strtolower($key)] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
public function offsetGet($offset)
|
||||
{
|
||||
return $this->headers[strtolower($offset)];
|
||||
}
|
||||
|
||||
public function offsetSet($offset, $value)
|
||||
{
|
||||
$this->headers[strtolower($offset)] = $value;
|
||||
}
|
||||
|
||||
public function offsetExists($offset)
|
||||
{
|
||||
return isset($this->headers[strtolower($offset)]);
|
||||
}
|
||||
|
||||
public function offsetUnset($offset)
|
||||
{
|
||||
unset($this->headers[strtolower($offset)]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse HTTP headers.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param array $lines List of headers
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function parse(array $lines)
|
||||
{
|
||||
$status = 0;
|
||||
$headers = array();
|
||||
|
||||
foreach ($lines as $line) {
|
||||
if (strpos($line, 'HTTP/1') === 0) {
|
||||
$headers = array();
|
||||
$status = (int) substr($line, 9, 3);
|
||||
} elseif (strpos($line, ': ') !== false) {
|
||||
list($name, $value) = explode(': ', $line);
|
||||
if ($value) {
|
||||
$headers[trim($name)] = trim($value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Logger::setMessage(get_called_class().' HTTP status code: '.$status);
|
||||
|
||||
foreach ($headers as $name => $value) {
|
||||
Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
|
||||
}
|
||||
|
||||
return array($status, new self($headers));
|
||||
}
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/InvalidCertificateException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/InvalidCertificateException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* InvalidCertificateException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class InvalidCertificateException extends ClientException
|
||||
{
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/InvalidUrlException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/InvalidUrlException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* InvalidUrlException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class InvalidUrlException extends ClientException
|
||||
{
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/MaxRedirectException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/MaxRedirectException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* MaxRedirectException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class MaxRedirectException extends ClientException
|
||||
{
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/MaxSizeException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/MaxSizeException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* MaxSizeException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class MaxSizeException extends ClientException
|
||||
{
|
||||
}
|
201
vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php
vendored
Normal file
201
vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php
vendored
Normal file
|
@ -0,0 +1,201 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Stream context HTTP client.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Stream extends Client
|
||||
{
|
||||
/**
|
||||
* Prepare HTTP headers.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
private function prepareHeaders()
|
||||
{
|
||||
$headers = array(
|
||||
'Connection: close',
|
||||
'User-Agent: '.$this->user_agent,
|
||||
);
|
||||
|
||||
// disable compression in passthrough mode. It could result in double
|
||||
// compressed content which isn't decodeable by browsers
|
||||
if (function_exists('gzdecode') && !$this->isPassthroughEnabled()) {
|
||||
$headers[] = 'Accept-Encoding: gzip';
|
||||
}
|
||||
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
}
|
||||
|
||||
if ($this->last_modified) {
|
||||
$headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
}
|
||||
|
||||
if ($this->proxy_username) {
|
||||
$headers[] = 'Proxy-Authorization: Basic '.base64_encode($this->proxy_username.':'.$this->proxy_password);
|
||||
}
|
||||
|
||||
if ($this->username && $this->password) {
|
||||
$headers[] = 'Authorization: Basic '.base64_encode($this->username.':'.$this->password);
|
||||
}
|
||||
|
||||
$headers = array_merge($headers, $this->request_headers);
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct the final URL from location headers.
|
||||
*
|
||||
* @param array $headers List of HTTP response header
|
||||
*/
|
||||
private function setEffectiveUrl($headers)
|
||||
{
|
||||
foreach ($headers as $header) {
|
||||
if (stripos($header, 'Location') === 0) {
|
||||
list(, $value) = explode(': ', $header);
|
||||
|
||||
$this->url = Url::resolve($value, $this->url);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare stream context.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function prepareContext()
|
||||
{
|
||||
$context = array(
|
||||
'http' => array(
|
||||
'method' => 'GET',
|
||||
'protocol_version' => 1.1,
|
||||
'timeout' => $this->timeout,
|
||||
'max_redirects' => $this->max_redirects,
|
||||
),
|
||||
);
|
||||
|
||||
if ($this->proxy_hostname) {
|
||||
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
$context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
|
||||
$context['http']['request_fulluri'] = true;
|
||||
|
||||
if ($this->proxy_username) {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
} else {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
|
||||
$context['http']['header'] = implode("\r\n", $this->prepareHeaders());
|
||||
|
||||
return $context;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the HTTP request.
|
||||
*
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
*/
|
||||
public function doRequest()
|
||||
{
|
||||
$body = '';
|
||||
|
||||
// Create context
|
||||
$context = stream_context_create($this->prepareContext());
|
||||
|
||||
// Make HTTP request
|
||||
$stream = @fopen($this->url, 'r', false, $context);
|
||||
if (!is_resource($stream)) {
|
||||
throw new InvalidUrlException('Unable to establish a connection');
|
||||
}
|
||||
|
||||
// Get HTTP headers response
|
||||
$metadata = stream_get_meta_data($stream);
|
||||
list($status, $headers) = HttpHeaders::parse($metadata['wrapper_data']);
|
||||
|
||||
if ($this->isPassthroughEnabled()) {
|
||||
header(':', true, $status);
|
||||
|
||||
if (isset($headers['Content-Type'])) {
|
||||
header('Content-Type: '.$headers['Content-Type']);
|
||||
}
|
||||
|
||||
fpassthru($stream);
|
||||
} else {
|
||||
// Get the entire body until the max size
|
||||
$body = stream_get_contents($stream, $this->max_body_size + 1);
|
||||
|
||||
// If the body size is too large abort everything
|
||||
if (strlen($body) > $this->max_body_size) {
|
||||
throw new MaxSizeException('Content size too large');
|
||||
}
|
||||
|
||||
if ($metadata['timed_out']) {
|
||||
throw new TimeoutException('Operation timeout');
|
||||
}
|
||||
}
|
||||
|
||||
fclose($stream);
|
||||
|
||||
$this->setEffectiveUrl($metadata['wrapper_data']);
|
||||
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $this->decodeBody($body, $headers),
|
||||
'headers' => $headers,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode body response according to the HTTP headers.
|
||||
*
|
||||
* @param string $body Raw body
|
||||
* @param HttpHeaders $headers HTTP headers
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function decodeBody($body, HttpHeaders $headers)
|
||||
{
|
||||
if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') {
|
||||
$body = $this->decodeChunked($body);
|
||||
}
|
||||
|
||||
if (isset($headers['Content-Encoding']) && $headers['Content-Encoding'] === 'gzip') {
|
||||
$body = gzdecode($body);
|
||||
}
|
||||
|
||||
return $body;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a chunked body.
|
||||
*
|
||||
* @param string $str Raw body
|
||||
*
|
||||
* @return string Decoded body
|
||||
*/
|
||||
public function decodeChunked($str)
|
||||
{
|
||||
for ($result = ''; !empty($str); $str = trim($str)) {
|
||||
|
||||
// Get the chunk length
|
||||
$pos = strpos($str, "\r\n");
|
||||
$len = hexdec(substr($str, 0, $pos));
|
||||
|
||||
// Append the chunk to the result
|
||||
$result .= substr($str, $pos + 2, $len);
|
||||
$str = substr($str, $pos + 2 + $len);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/TimeoutException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Client/TimeoutException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* TimeoutException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class TimeoutException extends ClientException
|
||||
{
|
||||
}
|
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/UnauthorizedException.php
vendored
Normal file
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/UnauthorizedException.php
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* @author Bernhard Posselt
|
||||
*/
|
||||
class UnauthorizedException extends ClientException
|
||||
{
|
||||
}
|
290
vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php
vendored
Normal file
290
vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php
vendored
Normal file
|
@ -0,0 +1,290 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* URL class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Url
|
||||
{
|
||||
/**
|
||||
* URL.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $url = '';
|
||||
|
||||
/**
|
||||
* URL components.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $components = array();
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param string $url URL
|
||||
*/
|
||||
public function __construct($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
$this->components = parse_url($url) ?: array();
|
||||
|
||||
// Issue with PHP < 5.4.7 and protocol relative url
|
||||
if (version_compare(PHP_VERSION, '5.4.7', '<') && $this->isProtocolRelative()) {
|
||||
$pos = strpos($this->components['path'], '/', 2);
|
||||
|
||||
if ($pos === false) {
|
||||
$pos = strlen($this->components['path']);
|
||||
}
|
||||
|
||||
$this->components['host'] = substr($this->components['path'], 2, $pos - 2);
|
||||
$this->components['path'] = substr($this->components['path'], $pos);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shortcut method to get an absolute url from relative url.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param mixed $item_url Unknown url (can be relative or not)
|
||||
* @param mixed $website_url Website url
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function resolve($item_url, $website_url)
|
||||
{
|
||||
$link = is_string($item_url) ? new self($item_url) : $item_url;
|
||||
$website = is_string($website_url) ? new self($website_url) : $website_url;
|
||||
|
||||
if ($link->isRelativeUrl()) {
|
||||
if ($link->isRelativePath()) {
|
||||
return $link->getAbsoluteUrl($website->getBaseUrl($website->getBasePath()));
|
||||
}
|
||||
|
||||
return $link->getAbsoluteUrl($website->getBaseUrl());
|
||||
} elseif ($link->isProtocolRelative()) {
|
||||
$link->setScheme($website->getScheme());
|
||||
}
|
||||
|
||||
return $link->getAbsoluteUrl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Shortcut method to get a base url.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $url
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function base($url)
|
||||
{
|
||||
$link = new self($url);
|
||||
|
||||
return $link->getBaseUrl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base URL.
|
||||
*
|
||||
* @param string $suffix Add a suffix to the url
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getBaseUrl($suffix = '')
|
||||
{
|
||||
return $this->hasHost() ? $this->getScheme('://').$this->getHost().$this->getPort(':').$suffix : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the absolute URL.
|
||||
*
|
||||
* @param string $base_url Use this url as base url
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getAbsoluteUrl($base_url = '')
|
||||
{
|
||||
if ($base_url) {
|
||||
$base = new self($base_url);
|
||||
$url = $base->getAbsoluteUrl().substr($this->getFullPath(), 1);
|
||||
} else {
|
||||
$url = $this->hasHost() ? $this->getBaseUrl().$this->getFullPath() : '';
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url is relative.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isRelativeUrl()
|
||||
{
|
||||
return !$this->hasScheme() && !$this->isProtocolRelative();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the path is relative.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isRelativePath()
|
||||
{
|
||||
$path = $this->getPath();
|
||||
|
||||
return empty($path) || $path{0}
|
||||
!== '/';
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters the path of a URI.
|
||||
*
|
||||
* Imported from Guzzle library: https://github.com/guzzle/psr7/blob/master/src/Uri.php#L568-L582
|
||||
*
|
||||
* @param $path
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function filterPath($path, $charUnreserved = 'a-zA-Z0-9_\-\.~', $charSubDelims = '!\$&\'\(\)\*\+,;=')
|
||||
{
|
||||
return preg_replace_callback(
|
||||
'/(?:[^'.$charUnreserved.$charSubDelims.':@\/%]+|%(?![A-Fa-f0-9]{2}))/',
|
||||
function (array $matches) { return rawurlencode($matches[0]); },
|
||||
$path
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getPath()
|
||||
{
|
||||
return $this->filterPath(empty($this->components['path']) ? '' : $this->components['path']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base path.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getBasePath()
|
||||
{
|
||||
$current_path = $this->getPath();
|
||||
|
||||
$path = $this->isRelativePath() ? '/' : '';
|
||||
$path .= substr($current_path, -1) === '/' ? $current_path : dirname($current_path);
|
||||
|
||||
return preg_replace('/\\\\\/|\/\//', '/', $path.'/');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the full path (path + querystring + fragment).
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getFullPath()
|
||||
{
|
||||
$path = $this->isRelativePath() ? '/' : '';
|
||||
$path .= $this->getPath();
|
||||
$path .= empty($this->components['query']) ? '' : '?'.$this->components['query'];
|
||||
$path .= empty($this->components['fragment']) ? '' : '#'.$this->components['fragment'];
|
||||
|
||||
return $path;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the hostname.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getHost()
|
||||
{
|
||||
return empty($this->components['host']) ? '' : $this->components['host'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a hostname.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasHost()
|
||||
{
|
||||
return !empty($this->components['host']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the scheme.
|
||||
*
|
||||
* @param string $suffix Suffix to add when there is a scheme
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getScheme($suffix = '')
|
||||
{
|
||||
return ($this->hasScheme() ? $this->components['scheme'] : 'http').$suffix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the scheme.
|
||||
*
|
||||
* @param string $scheme Set a scheme
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function setScheme($scheme)
|
||||
{
|
||||
$this->components['scheme'] = $scheme;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a scheme.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasScheme()
|
||||
{
|
||||
return !empty($this->components['scheme']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the port.
|
||||
*
|
||||
* @param string $prefix Prefix to add when there is a port
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getPort($prefix = '')
|
||||
{
|
||||
return $this->hasPort() ? $prefix.$this->components['port'] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a port.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasPort()
|
||||
{
|
||||
return !empty($this->components['port']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url is protocol relative (start with //).
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isProtocolRelative()
|
||||
{
|
||||
return strpos($this->url, '//') === 0;
|
||||
}
|
||||
}
|
96
vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php
vendored
Normal file
96
vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php
vendored
Normal file
|
@ -0,0 +1,96 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Config;
|
||||
|
||||
/**
|
||||
* Config class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*
|
||||
* @method \PicoFeed\Config\Config setClientTimeout(integer $value)
|
||||
* @method \PicoFeed\Config\Config setClientUserAgent(string $value)
|
||||
* @method \PicoFeed\Config\Config setMaxRedirections(integer $value)
|
||||
* @method \PicoFeed\Config\Config setMaxBodySize(integer $value)
|
||||
* @method \PicoFeed\Config\Config setProxyHostname(string $value)
|
||||
* @method \PicoFeed\Config\Config setProxyPort(integer $value)
|
||||
* @method \PicoFeed\Config\Config setProxyUsername(string $value)
|
||||
* @method \PicoFeed\Config\Config setProxyPassword(string $value)
|
||||
* @method \PicoFeed\Config\Config setGrabberRulesFolder(string $value)
|
||||
* @method \PicoFeed\Config\Config setGrabberTimeout(integer $value)
|
||||
* @method \PicoFeed\Config\Config setGrabberUserAgent(string $value)
|
||||
* @method \PicoFeed\Config\Config setParserHashAlgo(string $value)
|
||||
* @method \PicoFeed\Config\Config setContentFiltering(boolean $value)
|
||||
* @method \PicoFeed\Config\Config setTimezone(string $value)
|
||||
* @method \PicoFeed\Config\Config setFilterIframeWhitelist(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterIntegerAttributes(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterAttributeOverrides(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterRequiredAttributes(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterMediaBlacklist(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterMediaAttributes(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterSchemeWhitelist(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterWhitelistedTags(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterBlacklistedTags(array $value)
|
||||
* @method \PicoFeed\Config\Config setFilterImageProxyUrl($value)
|
||||
* @method \PicoFeed\Config\Config setFilterImageProxyCallback($closure)
|
||||
* @method \PicoFeed\Config\Config setFilterImageProxyProtocol($value)
|
||||
* @method integer getClientTimeout()
|
||||
* @method string getClientUserAgent()
|
||||
* @method integer getMaxRedirections()
|
||||
* @method integer getMaxBodySize()
|
||||
* @method string getProxyHostname()
|
||||
* @method integer getProxyPort()
|
||||
* @method string getProxyUsername()
|
||||
* @method string getProxyPassword()
|
||||
* @method string getGrabberRulesFolder()
|
||||
* @method integer getGrabberTimeout()
|
||||
* @method string getGrabberUserAgent()
|
||||
* @method string getParserHashAlgo()
|
||||
* @method boolean getContentFiltering(bool $default_value)
|
||||
* @method string getTimezone()
|
||||
* @method array getFilterIframeWhitelist(array $default_value)
|
||||
* @method array getFilterIntegerAttributes(array $default_value)
|
||||
* @method array getFilterAttributeOverrides(array $default_value)
|
||||
* @method array getFilterRequiredAttributes(array $default_value)
|
||||
* @method array getFilterMediaBlacklist(array $default_value)
|
||||
* @method array getFilterMediaAttributes(array $default_value)
|
||||
* @method array getFilterSchemeWhitelist(array $default_value)
|
||||
* @method array getFilterWhitelistedTags(array $default_value)
|
||||
* @method array getFilterBlacklistedTags(array $default_value)
|
||||
* @method string getFilterImageProxyUrl()
|
||||
* @method \Closure getFilterImageProxyCallback()
|
||||
* @method string getFilterImageProxyProtocol()
|
||||
*/
|
||||
class Config
|
||||
{
|
||||
/**
|
||||
* Contains all parameters.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $container = array();
|
||||
|
||||
/**
|
||||
* Magic method to have any kind of setters or getters.
|
||||
*
|
||||
* @param string $name Getter/Setter name
|
||||
* @param array $arguments Method arguments
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function __call($name, array $arguments)
|
||||
{
|
||||
$name = strtolower($name);
|
||||
$prefix = substr($name, 0, 3);
|
||||
$parameter = substr($name, 3);
|
||||
|
||||
if ($prefix === 'set' && isset($arguments[0])) {
|
||||
$this->container[$parameter] = $arguments[0];
|
||||
|
||||
return $this;
|
||||
} elseif ($prefix === 'get') {
|
||||
$default_value = isset($arguments[0]) ? $arguments[0] : null;
|
||||
|
||||
return isset($this->container[$parameter]) ? $this->container[$parameter] : $default_value;
|
||||
}
|
||||
}
|
||||
}
|
33
vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php
vendored
Normal file
33
vendor/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Encoding;
|
||||
|
||||
/**
|
||||
* Encoding class.
|
||||
*/
|
||||
class Encoding
|
||||
{
|
||||
public static function convert($input, $encoding)
|
||||
{
|
||||
if ($encoding === 'utf-8' || $encoding === '') {
|
||||
return $input;
|
||||
}
|
||||
|
||||
// suppress all notices since it isn't possible to silence only the
|
||||
// notice "Wrong charset, conversion from $in_encoding to $out_encoding is not allowed"
|
||||
set_error_handler(function () {}, E_NOTICE);
|
||||
|
||||
// convert input to utf-8 and strip invalid characters
|
||||
$value = iconv($encoding, 'UTF-8//IGNORE', $input);
|
||||
|
||||
// stop silencing of notices
|
||||
restore_error_handler();
|
||||
|
||||
// return input if something went wrong, maybe it's usable anyway
|
||||
if ($value === false) {
|
||||
return $input;
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
}
|
699
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
vendored
Normal file
699
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
vendored
Normal file
|
@ -0,0 +1,699 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
/**
|
||||
* Attribute Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Attribute
|
||||
{
|
||||
/**
|
||||
* Image proxy url.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $image_proxy_url = '';
|
||||
|
||||
/**
|
||||
* Image proxy callback.
|
||||
*
|
||||
* @var \Closure|null
|
||||
*/
|
||||
private $image_proxy_callback = null;
|
||||
|
||||
/**
|
||||
* limits the image proxy usage to this protocol.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $image_proxy_limit_protocol = '';
|
||||
|
||||
/**
|
||||
* Tags and attribute whitelist.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $attribute_whitelist = array(
|
||||
'audio' => array('controls', 'src'),
|
||||
'video' => array('poster', 'controls', 'height', 'width', 'src'),
|
||||
'source' => array('src', 'type'),
|
||||
'dt' => array(),
|
||||
'dd' => array(),
|
||||
'dl' => array(),
|
||||
'table' => array(),
|
||||
'caption' => array(),
|
||||
'tr' => array(),
|
||||
'th' => array(),
|
||||
'td' => array(),
|
||||
'tbody' => array(),
|
||||
'thead' => array(),
|
||||
'h2' => array(),
|
||||
'h3' => array(),
|
||||
'h4' => array(),
|
||||
'h5' => array(),
|
||||
'h6' => array(),
|
||||
'strong' => array(),
|
||||
'em' => array(),
|
||||
'code' => array(),
|
||||
'pre' => array(),
|
||||
'blockquote' => array(),
|
||||
'p' => array(),
|
||||
'ul' => array(),
|
||||
'li' => array(),
|
||||
'ol' => array(),
|
||||
'br' => array(),
|
||||
'del' => array(),
|
||||
'a' => array('href'),
|
||||
'img' => array('src', 'title', 'alt'),
|
||||
'figure' => array(),
|
||||
'figcaption' => array(),
|
||||
'cite' => array(),
|
||||
'time' => array('datetime'),
|
||||
'abbr' => array('title'),
|
||||
'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'),
|
||||
'q' => array('cite'),
|
||||
);
|
||||
|
||||
/**
|
||||
* Scheme whitelist.
|
||||
*
|
||||
* For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $scheme_whitelist = array(
|
||||
'bitcoin:',
|
||||
'callto:',
|
||||
'ed2k://',
|
||||
'facetime://',
|
||||
'feed:',
|
||||
'ftp://',
|
||||
'geo:',
|
||||
'git://',
|
||||
'http://',
|
||||
'https://',
|
||||
'irc://',
|
||||
'irc6://',
|
||||
'ircs://',
|
||||
'jabber:',
|
||||
'magnet:',
|
||||
'mailto:',
|
||||
'nntp://',
|
||||
'rtmp://',
|
||||
'sftp://',
|
||||
'sip:',
|
||||
'sips:',
|
||||
'skype:',
|
||||
'smb://',
|
||||
'sms:',
|
||||
'spotify:',
|
||||
'ssh:',
|
||||
'steam:',
|
||||
'svn://',
|
||||
'tel:',
|
||||
);
|
||||
|
||||
/**
|
||||
* Iframe source whitelist, everything else is ignored.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $iframe_whitelist = array(
|
||||
'http://www.youtube.com',
|
||||
'https://www.youtube.com',
|
||||
'http://player.vimeo.com',
|
||||
'https://player.vimeo.com',
|
||||
'http://www.dailymotion.com',
|
||||
'https://www.dailymotion.com',
|
||||
'http://vk.com',
|
||||
'https://vk.com',
|
||||
);
|
||||
|
||||
/**
|
||||
* Blacklisted resources.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $media_blacklist = array(
|
||||
'api.flattr.com',
|
||||
'feeds.feedburner.com',
|
||||
'share.feedsportal.com',
|
||||
'da.feedsportal.com',
|
||||
'rc.feedsportal.com',
|
||||
'rss.feedsportal.com',
|
||||
'res.feedsportal.com',
|
||||
'res1.feedsportal.com',
|
||||
'res2.feedsportal.com',
|
||||
'res3.feedsportal.com',
|
||||
'pi.feedsportal.com',
|
||||
'rss.nytimes.com',
|
||||
'feeds.wordpress.com',
|
||||
'stats.wordpress.com',
|
||||
'rss.cnn.com',
|
||||
'twitter.com/home?status=',
|
||||
'twitter.com/share',
|
||||
'twitter_icon_large.png',
|
||||
'www.facebook.com/sharer.php',
|
||||
'facebook_icon_large.png',
|
||||
'plus.google.com/share',
|
||||
'www.gstatic.com/images/icons/gplus-16.png',
|
||||
'www.gstatic.com/images/icons/gplus-32.png',
|
||||
'www.gstatic.com/images/icons/gplus-64.png',
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes used for external resources.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $media_attributes = array(
|
||||
'src',
|
||||
'href',
|
||||
'poster',
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes that must be integer.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $integer_attributes = array(
|
||||
'width',
|
||||
'height',
|
||||
'frameborder',
|
||||
);
|
||||
|
||||
/**
|
||||
* Mandatory attributes for specified tags.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $required_attributes = array(
|
||||
'a' => array('href'),
|
||||
'img' => array('src'),
|
||||
'iframe' => array('src'),
|
||||
'audio' => array('src'),
|
||||
'source' => array('src'),
|
||||
);
|
||||
|
||||
/**
|
||||
* Add attributes to specified tags.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $add_attributes = array(
|
||||
'a' => array('rel' => 'noreferrer', 'target' => '_blank'),
|
||||
'video' => array('controls' => 'true'),
|
||||
);
|
||||
|
||||
/**
|
||||
* List of filters to apply.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $filters = array(
|
||||
'filterAllowedAttribute',
|
||||
'filterIntegerAttribute',
|
||||
'rewriteAbsoluteUrl',
|
||||
'filterIframeAttribute',
|
||||
'filterBlacklistResourceAttribute',
|
||||
'filterProtocolUrlAttribute',
|
||||
'rewriteImageProxyUrl',
|
||||
'secureIframeSrc',
|
||||
'removeYouTubeAutoplay',
|
||||
);
|
||||
|
||||
/**
|
||||
* Add attributes to specified tags.
|
||||
*
|
||||
* @var \PicoFeed\Client\Url
|
||||
*/
|
||||
private $website;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param \PicoFeed\Client\Url $website Website url instance
|
||||
*/
|
||||
public function __construct(Url $website)
|
||||
{
|
||||
$this->website = $website;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply filters to the attributes list.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
*
|
||||
* @return array Filtered attributes
|
||||
*/
|
||||
public function filter($tag, array $attributes)
|
||||
{
|
||||
foreach ($attributes as $attribute => &$value) {
|
||||
foreach ($this->filters as $filter) {
|
||||
if (!$this->$filter($tag, $attribute, $value)) {
|
||||
unset($attributes[$attribute]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the value is allowed (remove not allowed attributes).
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterAllowedAttribute($tag, $attribute, $value)
|
||||
{
|
||||
return isset($this->attribute_whitelist[$tag]) && in_array($attribute, $this->attribute_whitelist[$tag]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the value is not integer (remove attributes that should have an integer value).
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterIntegerAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if (in_array($attribute, $this->integer_attributes)) {
|
||||
return ctype_digit($value);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the iframe source is allowed (remove not allowed iframe).
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterIframeAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($tag === 'iframe' && $attribute === 'src') {
|
||||
foreach ($this->iframe_whitelist as $url) {
|
||||
if (strpos($value, $url) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the resource is not blacklisted (remove blacklisted resource attributes).
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterBlacklistResourceAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($this->isResource($attribute) && $this->isBlacklistedMedia($value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert all relative links to absolute url.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function rewriteAbsoluteUrl($tag, $attribute, &$value)
|
||||
{
|
||||
if ($this->isResource($attribute)) {
|
||||
$value = Url::resolve($value, $this->website);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns iframes' src attribute from http to https to prevent
|
||||
* mixed active content.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attribute Atttributes name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function secureIframeSrc($tag, $attribute, &$value)
|
||||
{
|
||||
if ($tag === 'iframe' && $attribute === 'src' && strpos($value, 'http://') === 0) {
|
||||
$value = substr_replace($value, 's', 4, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes YouTube autoplay from iframes.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attribute Atttributes name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function removeYouTubeAutoplay($tag, $attribute, &$value)
|
||||
{
|
||||
$regex = '%^(https://(?:www\.)?youtube.com/.*\?.*autoplay=)(1)(.*)%i';
|
||||
if ($tag === 'iframe' && $attribute === 'src' && preg_match($regex, $value)) {
|
||||
$value = preg_replace($regex, '${1}0$3', $value);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite image url to use with a proxy.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function rewriteImageProxyUrl($tag, $attribute, &$value)
|
||||
{
|
||||
if ($tag === 'img' && $attribute === 'src'
|
||||
&& !($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) {
|
||||
if ($this->image_proxy_url) {
|
||||
$value = sprintf($this->image_proxy_url, rawurlencode($value));
|
||||
} elseif (is_callable($this->image_proxy_callback)) {
|
||||
$value = call_user_func($this->image_proxy_callback, $value);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the scheme is authorized.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterProtocolUrlAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($this->isResource($attribute) && !$this->isAllowedProtocol($value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Automatically add/override some attributes for specific tags.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes list
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function addAttributes($tag, array $attributes)
|
||||
{
|
||||
if (isset($this->add_attributes[$tag])) {
|
||||
$attributes += $this->add_attributes[$tag];
|
||||
}
|
||||
|
||||
return $attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if all required attributes are present.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes list
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasRequiredAttributes($tag, array $attributes)
|
||||
{
|
||||
if (isset($this->required_attributes[$tag])) {
|
||||
foreach ($this->required_attributes[$tag] as $attribute) {
|
||||
if (!isset($attributes[$attribute])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an attribute name is an external resource.
|
||||
*
|
||||
* @param string $attribute Attribute name
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isResource($attribute)
|
||||
{
|
||||
return in_array($attribute, $this->media_attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if the protocol is allowed or not.
|
||||
*
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isAllowedProtocol($value)
|
||||
{
|
||||
foreach ($this->scheme_whitelist as $protocol) {
|
||||
if (strpos($value, $protocol) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an url is blacklisted.
|
||||
*
|
||||
* @param string $resource Attribute value (URL)
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isBlacklistedMedia($resource)
|
||||
{
|
||||
foreach ($this->media_blacklist as $name) {
|
||||
if (strpos($resource, $name) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the attribute list to html.
|
||||
*
|
||||
* @param array $attributes Attributes
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function toHtml(array $attributes)
|
||||
{
|
||||
$html = array();
|
||||
|
||||
foreach ($attributes as $attribute => $value) {
|
||||
$html[] = sprintf('%s="%s"', $attribute, Filter::escape($value));
|
||||
}
|
||||
|
||||
return implode(' ', $html);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags and attributes for each tag.
|
||||
*
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setWhitelistedAttributes(array $values)
|
||||
{
|
||||
$this->attribute_whitelist = $values ?: $this->attribute_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set scheme whitelist.
|
||||
*
|
||||
* @param array $values List of scheme: ['http://', 'ftp://']
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setSchemeWhitelist(array $values)
|
||||
{
|
||||
$this->scheme_whitelist = $values ?: $this->scheme_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set media attributes (used to load external resources).
|
||||
*
|
||||
* @param array $values List of values: ['src', 'href']
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setMediaAttributes(array $values)
|
||||
{
|
||||
$this->media_attributes = $values ?: $this->media_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set blacklisted external resources.
|
||||
*
|
||||
* @param array $values List of tags: ['http://google.com/', '...']
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setMediaBlacklist(array $values)
|
||||
{
|
||||
$this->media_blacklist = $values ?: $this->media_blacklist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set mandatory attributes for whitelisted tags.
|
||||
*
|
||||
* @param array $values List of tags: ['img' => 'src']
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setRequiredAttributes(array $values)
|
||||
{
|
||||
$this->required_attributes = $values ?: $this->required_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes to automatically to specific tags.
|
||||
*
|
||||
* @param array $values List of tags: ['a' => 'target="_blank"']
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setAttributeOverrides(array $values)
|
||||
{
|
||||
$this->add_attributes = $values ?: $this->add_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes that must be an integer.
|
||||
*
|
||||
* @param array $values List of tags: ['width', 'height']
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setIntegerAttributes(array $values)
|
||||
{
|
||||
$this->integer_attributes = $values ?: $this->integer_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set allowed iframe resources.
|
||||
*
|
||||
* @param array $values List of tags: ['http://www.youtube.com']
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setIframeWhitelist(array $values)
|
||||
{
|
||||
$this->iframe_whitelist = $values ?: $this->iframe_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set image proxy URL.
|
||||
*
|
||||
* The original image url will be urlencoded
|
||||
*
|
||||
* @param string $url Proxy URL
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setImageProxyUrl($url)
|
||||
{
|
||||
$this->image_proxy_url = $url ?: $this->image_proxy_url;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set image proxy callback.
|
||||
*
|
||||
* @param \Closure $callback
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setImageProxyCallback($callback)
|
||||
{
|
||||
$this->image_proxy_callback = $callback ?: $this->image_proxy_callback;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set image proxy protocol restriction.
|
||||
*
|
||||
* @param string $value
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setImageProxyProtocol($value)
|
||||
{
|
||||
$this->image_proxy_limit_protocol = $value ?: $this->image_proxy_limit_protocol;
|
||||
|
||||
return $this;
|
||||
}
|
||||
}
|
155
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
vendored
Normal file
155
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php
vendored
Normal file
|
@ -0,0 +1,155 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
/**
|
||||
* Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Filter
|
||||
{
|
||||
/**
|
||||
* Get the Html filter instance.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
*
|
||||
* @return Html
|
||||
*/
|
||||
public static function html($html, $website)
|
||||
{
|
||||
$filter = new Html($html, $website);
|
||||
|
||||
return $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape HTML content.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function escape($content)
|
||||
{
|
||||
return htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove HTML tags.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function removeHTMLTags($data)
|
||||
{
|
||||
return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the XML tag from a document.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function stripXmlTag($data)
|
||||
{
|
||||
if (strpos($data, '<?xml') !== false) {
|
||||
$data = ltrim(substr($data, strpos($data, '?>') + 2));
|
||||
}
|
||||
|
||||
do {
|
||||
$pos = strpos($data, '<?xml-stylesheet ');
|
||||
|
||||
if ($pos !== false) {
|
||||
$data = ltrim(substr($data, strpos($data, '?>') + 2));
|
||||
}
|
||||
} while ($pos !== false && $pos < 200);
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip head tag from the HTML content.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function stripHeadTags($data)
|
||||
{
|
||||
return preg_replace('@<head[^>]*?>.*?</head>@siu', '', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $value Raw data
|
||||
*
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public static function stripWhiteSpace($value)
|
||||
{
|
||||
$value = str_replace("\r", ' ', $value);
|
||||
$value = str_replace("\t", ' ', $value);
|
||||
$value = str_replace("\n", ' ', $value);
|
||||
// $value = preg_replace('/\s+/', ' ', $value); <= break utf-8
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fixes before XML parsing.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $data Raw data
|
||||
*
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public static function normalizeData($data)
|
||||
{
|
||||
$entities = array(
|
||||
'/(&#)(\d+);/m', // decimal encoded
|
||||
'/(&#x)([a-f0-9]+);/mi', // hex encoded
|
||||
);
|
||||
|
||||
// strip invalid XML 1.0 characters which are encoded as entities
|
||||
$data = preg_replace_callback($entities, function ($matches) {
|
||||
$code_point = $matches[2];
|
||||
|
||||
// convert hex entity to decimal
|
||||
if (strtolower($matches[1]) === '&#x') {
|
||||
$code_point = hexdec($code_point);
|
||||
}
|
||||
|
||||
$code_point = (int) $code_point;
|
||||
|
||||
// replace invalid characters
|
||||
if ($code_point < 9
|
||||
|| ($code_point > 10 && $code_point < 13)
|
||||
|| ($code_point > 13 && $code_point < 32)
|
||||
|| ($code_point > 55295 && $code_point < 57344)
|
||||
|| ($code_point > 65533 && $code_point < 65536)
|
||||
|| $code_point > 1114111
|
||||
) {
|
||||
return '';
|
||||
};
|
||||
|
||||
return $matches[0];
|
||||
}, $data);
|
||||
|
||||
// strip every utf-8 character than isn't in the range of valid XML 1.0 characters
|
||||
return (string) preg_replace('/[^\x{0009}\x{000A}\x{000D}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', '', $data);
|
||||
}
|
||||
}
|
243
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
vendored
Normal file
243
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
vendored
Normal file
|
@ -0,0 +1,243 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
use PicoFeed\Config\Config;
|
||||
use PicoFeed\Client\Url;
|
||||
use PicoFeed\Scraper\RuleLoader;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
* HTML Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Html
|
||||
{
|
||||
/**
|
||||
* Config object.
|
||||
*
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Unfiltered XML data.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $input = '';
|
||||
|
||||
/**
|
||||
* Filtered XML data.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $output = '';
|
||||
|
||||
/**
|
||||
* List of empty tags.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $empty_tags = array();
|
||||
|
||||
/**
|
||||
* Empty flag.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $empty = true;
|
||||
|
||||
/**
|
||||
* Tag instance.
|
||||
*
|
||||
* @var \PicoFeed\Filter\Tag
|
||||
*/
|
||||
public $tag = '';
|
||||
|
||||
/**
|
||||
* Attribute instance.
|
||||
*
|
||||
* @var \PicoFeed\Filter\Attribute
|
||||
*/
|
||||
public $attribute = '';
|
||||
|
||||
/**
|
||||
* The website to filter.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $website;
|
||||
|
||||
/**
|
||||
* Initialize the filter, all inputs data must be encoded in UTF-8 before.
|
||||
*
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
*/
|
||||
public function __construct($html, $website)
|
||||
{
|
||||
$this->config = new Config();
|
||||
$this->input = XmlParser::htmlToXml($html);
|
||||
$this->output = '';
|
||||
$this->tag = new Tag($this->config);
|
||||
$this->website = $website;
|
||||
$this->attribute = new Attribute(new Url($website));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object.
|
||||
*
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
*
|
||||
* @return \PicoFeed\Filter\Html
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
|
||||
if ($this->config !== null) {
|
||||
$this->attribute->setImageProxyCallback($this->config->getFilterImageProxyCallback());
|
||||
$this->attribute->setImageProxyUrl($this->config->getFilterImageProxyUrl());
|
||||
$this->attribute->setImageProxyProtocol($this->config->getFilterImageProxyProtocol());
|
||||
$this->attribute->setIframeWhitelist($this->config->getFilterIframeWhitelist(array()));
|
||||
$this->attribute->setIntegerAttributes($this->config->getFilterIntegerAttributes(array()));
|
||||
$this->attribute->setAttributeOverrides($this->config->getFilterAttributeOverrides(array()));
|
||||
$this->attribute->setRequiredAttributes($this->config->getFilterRequiredAttributes(array()));
|
||||
$this->attribute->setMediaBlacklist($this->config->getFilterMediaBlacklist(array()));
|
||||
$this->attribute->setMediaAttributes($this->config->getFilterMediaAttributes(array()));
|
||||
$this->attribute->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array()));
|
||||
$this->attribute->setWhitelistedAttributes($this->config->getFilterWhitelistedTags(array()));
|
||||
$this->tag->setWhitelistedTags(array_keys($this->config->getFilterWhitelistedTags(array())));
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run tags/attributes filtering.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function execute()
|
||||
{
|
||||
$this->preFilter();
|
||||
|
||||
$parser = xml_parser_create();
|
||||
|
||||
xml_set_object($parser, $this);
|
||||
xml_set_element_handler($parser, 'startTag', 'endTag');
|
||||
xml_set_character_data_handler($parser, 'dataTag');
|
||||
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
|
||||
xml_parse($parser, $this->input, true);
|
||||
xml_parser_free($parser);
|
||||
|
||||
$this->postFilter();
|
||||
|
||||
return $this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called before XML parsing.
|
||||
*/
|
||||
public function preFilter()
|
||||
{
|
||||
$this->input = $this->tag->removeBlacklistedTags($this->input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after XML parsing.
|
||||
*/
|
||||
public function postFilter()
|
||||
{
|
||||
$this->output = $this->tag->removeEmptyTags($this->output);
|
||||
$this->output = $this->filterRules($this->output);
|
||||
$this->output = $this->tag->removeMultipleBreakTags($this->output);
|
||||
$this->output = trim($this->output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after XML parsing.
|
||||
*
|
||||
* @param string $content the content that should be filtered
|
||||
*/
|
||||
public function filterRules($content)
|
||||
{
|
||||
// the constructor should require a config, then this if can be removed
|
||||
if ($this->config === null) {
|
||||
$config = new Config();
|
||||
} else {
|
||||
$config = $this->config;
|
||||
}
|
||||
|
||||
$loader = new RuleLoader($config);
|
||||
$rules = $loader->getRules($this->website);
|
||||
|
||||
$url = new Url($this->website);
|
||||
$sub_url = $url->getFullPath();
|
||||
|
||||
if (isset($rules['filter'])) {
|
||||
foreach ($rules['filter'] as $pattern => $rule) {
|
||||
if (preg_match($pattern, $sub_url)) {
|
||||
foreach ($rule as $search => $replace) {
|
||||
$content = preg_replace($search, $replace, $content);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse opening tag.
|
||||
*
|
||||
* @param resource $parser XML parser
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
*/
|
||||
public function startTag($parser, $tag, array $attributes)
|
||||
{
|
||||
$this->empty = true;
|
||||
|
||||
if ($this->tag->isAllowed($tag, $attributes)) {
|
||||
$attributes = $this->attribute->filter($tag, $attributes);
|
||||
|
||||
if ($this->attribute->hasRequiredAttributes($tag, $attributes)) {
|
||||
$attributes = $this->attribute->addAttributes($tag, $attributes);
|
||||
|
||||
$this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes));
|
||||
$this->empty = false;
|
||||
}
|
||||
}
|
||||
|
||||
$this->empty_tags[] = $this->empty;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse closing tag.
|
||||
*
|
||||
* @param resource $parser XML parser
|
||||
* @param string $tag Tag name
|
||||
*/
|
||||
public function endTag($parser, $tag)
|
||||
{
|
||||
if (!array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) {
|
||||
$this->output .= $this->tag->closeHtmlTag($tag);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse tag content.
|
||||
*
|
||||
* @param resource $parser XML parser
|
||||
* @param string $content Tag content
|
||||
*/
|
||||
public function dataTag($parser, $content)
|
||||
{
|
||||
// Replace with normal space
|
||||
$content = str_replace("\xc2\xa0", ' ', $content);
|
||||
$this->output .= Filter::escape($content);
|
||||
}
|
||||
}
|
215
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
vendored
Normal file
215
vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
vendored
Normal file
|
@ -0,0 +1,215 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
use DOMXPath;
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
* Tag Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Tag extends Base
|
||||
{
|
||||
/**
|
||||
* Tags blacklist (Xpath expressions).
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $tag_blacklist = array(
|
||||
'//script',
|
||||
'//style',
|
||||
);
|
||||
|
||||
/**
|
||||
* Tags whitelist.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $tag_whitelist = array(
|
||||
'audio',
|
||||
'video',
|
||||
'source',
|
||||
'dt',
|
||||
'dd',
|
||||
'dl',
|
||||
'table',
|
||||
'caption',
|
||||
'tr',
|
||||
'th',
|
||||
'td',
|
||||
'tbody',
|
||||
'thead',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'strong',
|
||||
'em',
|
||||
'code',
|
||||
'pre',
|
||||
'blockquote',
|
||||
'p',
|
||||
'ul',
|
||||
'li',
|
||||
'ol',
|
||||
'br',
|
||||
'del',
|
||||
'a',
|
||||
'img',
|
||||
'figure',
|
||||
'figcaption',
|
||||
'cite',
|
||||
'time',
|
||||
'abbr',
|
||||
'iframe',
|
||||
'q',
|
||||
);
|
||||
|
||||
/**
|
||||
* Check if the tag is allowed and is not a pixel tracker.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isAllowed($tag, array $attributes)
|
||||
{
|
||||
return $this->isAllowedTag($tag) && !$this->isPixelTracker($tag, $attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the HTML opening tag.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attributes Attributes converted in html
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function openHtmlTag($tag, $attributes = '')
|
||||
{
|
||||
return '<'.$tag.(empty($attributes) ? '' : ' '.$attributes).($this->isSelfClosingTag($tag) ? '/>' : '>');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the HTML closing tag.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function closeHtmlTag($tag)
|
||||
{
|
||||
return $this->isSelfClosingTag($tag) ? '' : '</'.$tag.'>';
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true is the tag is self-closing.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isSelfClosingTag($tag)
|
||||
{
|
||||
return $tag === 'br' || $tag === 'img';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tag is on the whitelist.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isAllowedTag($tag)
|
||||
{
|
||||
return in_array($tag, array_merge(
|
||||
$this->tag_whitelist,
|
||||
array_keys($this->config->getFilterWhitelistedTags(array()))
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an image tag is a pixel tracker.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isPixelTracker($tag, array $attributes)
|
||||
{
|
||||
return $tag === 'img' &&
|
||||
isset($attributes['height']) && isset($attributes['width']) &&
|
||||
$attributes['height'] == 1 && $attributes['width'] == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove script tags.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function removeBlacklistedTags($data)
|
||||
{
|
||||
$dom = XmlParser::getDomDocument($data);
|
||||
|
||||
if ($dom === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$xpath = new DOMXpath($dom);
|
||||
|
||||
$nodes = $xpath->query(implode(' | ', $this->tag_blacklist));
|
||||
|
||||
foreach ($nodes as $node) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
|
||||
return $dom->saveXML();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove empty tags.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function removeEmptyTags($data)
|
||||
{
|
||||
return preg_replace('/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', '', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <br/><br/> by only one.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function removeMultipleBreakTags($data)
|
||||
{
|
||||
return preg_replace("/(<br\s*\/?>\s*)+/", '<br/>', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags adn attributes for each tag.
|
||||
*
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
*
|
||||
* @return Tag
|
||||
*/
|
||||
public function setWhitelistedTags(array $values)
|
||||
{
|
||||
$this->tag_whitelist = $values ?: $this->tag_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
}
|
23
vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php
vendored
Normal file
23
vendor/fguillot/picofeed/lib/PicoFeed/Generator/ContentGeneratorInterface.php
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Generator;
|
||||
|
||||
use PicoFeed\Parser\Item;
|
||||
|
||||
/**
|
||||
* Content Generator Interface
|
||||
*
|
||||
* @package PicoFeed\Generator
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
interface ContentGeneratorInterface
|
||||
{
|
||||
/**
|
||||
* Execute Content Generator
|
||||
*
|
||||
* @access public
|
||||
* @param Item $item
|
||||
* @return boolean
|
||||
*/
|
||||
public function execute(Item $item);
|
||||
}
|
36
vendor/fguillot/picofeed/lib/PicoFeed/Generator/FileContentGenerator.php
vendored
Normal file
36
vendor/fguillot/picofeed/lib/PicoFeed/Generator/FileContentGenerator.php
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Generator;
|
||||
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Parser\Item;
|
||||
|
||||
/**
|
||||
* File Content Generator
|
||||
*
|
||||
* @package PicoFeed\Generator
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class FileContentGenerator extends Base implements ContentGeneratorInterface
|
||||
{
|
||||
private $extensions = array('pdf');
|
||||
|
||||
/**
|
||||
* Execute Content Generator
|
||||
*
|
||||
* @access public
|
||||
* @param Item $item
|
||||
* @return boolean
|
||||
*/
|
||||
public function execute(Item $item)
|
||||
{
|
||||
foreach ($this->extensions as $extension) {
|
||||
if (substr($item->getUrl(), - strlen($extension)) === $extension) {
|
||||
$item->setContent('<a href="'.$item->getUrl().'" target="_blank">'.$item->getUrl().'</a>');
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
67
vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php
vendored
Normal file
67
vendor/fguillot/picofeed/lib/PicoFeed/Generator/YoutubeContentGenerator.php
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Generator;
|
||||
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Parser\Item;
|
||||
|
||||
/**
|
||||
* Youtube Content Generator
|
||||
*
|
||||
* @package PicoFeed\Generator
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class YoutubeContentGenerator extends Base implements ContentGeneratorInterface
|
||||
{
|
||||
/**
|
||||
* Execute Content Generator
|
||||
*
|
||||
* @access public
|
||||
* @param Item $item
|
||||
* @return boolean
|
||||
*/
|
||||
public function execute(Item $item)
|
||||
{
|
||||
if ($item->hasNamespace('yt')) {
|
||||
return $this->generateHtmlFromXml($item);
|
||||
}
|
||||
|
||||
return $this->generateHtmlFromUrl($item);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate HTML
|
||||
*
|
||||
* @access public
|
||||
* @param Item $item
|
||||
* @return boolean
|
||||
*/
|
||||
private function generateHtmlFromXml(Item $item)
|
||||
{
|
||||
$videoId = $item->getTag('yt:videoId');
|
||||
|
||||
if (! empty($videoId)) {
|
||||
$item->setContent('<iframe width="560" height="315" src="//www.youtube.com/embed/'.$videoId[0].'" frameborder="0"></iframe>');
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate HTML from item URL
|
||||
*
|
||||
* @access public
|
||||
* @param Item $item
|
||||
* @return bool
|
||||
*/
|
||||
public function generateHtmlFromUrl(Item $item)
|
||||
{
|
||||
if (preg_match('/youtube\.com\/watch\?v=(.*)/', $item->getUrl(), $matches)) {
|
||||
$item->setContent('<iframe width="560" height="315" src="//www.youtube.com/embed/'.$matches[1].'" frameborder="0"></iframe>');
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
114
vendor/fguillot/picofeed/lib/PicoFeed/Logging/Logger.php
vendored
Normal file
114
vendor/fguillot/picofeed/lib/PicoFeed/Logging/Logger.php
vendored
Normal file
|
@ -0,0 +1,114 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Logging;
|
||||
|
||||
use DateTime;
|
||||
use DateTimeZone;
|
||||
|
||||
/**
|
||||
* Logging class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Logger
|
||||
{
|
||||
/**
|
||||
* List of messages.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $messages = array();
|
||||
|
||||
/**
|
||||
* Default timezone.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private static $timezone = 'UTC';
|
||||
|
||||
/**
|
||||
* Enable or disable logging.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
public static $enable = false;
|
||||
|
||||
/**
|
||||
* Enable logging.
|
||||
*
|
||||
* @static
|
||||
*/
|
||||
public static function enable()
|
||||
{
|
||||
self::$enable = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new message.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $message Message
|
||||
*/
|
||||
public static function setMessage($message)
|
||||
{
|
||||
if (self::$enable) {
|
||||
$date = new DateTime('now', new DateTimeZone(self::$timezone));
|
||||
self::$messages[] = '['.$date->format('Y-m-d H:i:s').'] '.$message;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all logged messages.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function getMessages()
|
||||
{
|
||||
return self::$messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all logged messages.
|
||||
*
|
||||
* @static
|
||||
*/
|
||||
public static function deleteMessages()
|
||||
{
|
||||
self::$messages = array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a different timezone.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @see http://php.net/manual/en/timezones.php
|
||||
*
|
||||
* @param string $timezone Timezone
|
||||
*/
|
||||
public static function setTimeZone($timezone)
|
||||
{
|
||||
self::$timezone = $timezone ?: self::$timezone;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all messages serialized into a string.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function toString()
|
||||
{
|
||||
return implode(PHP_EOL, self::$messages).PHP_EOL;
|
||||
}
|
||||
}
|
364
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php
vendored
Normal file
364
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Atom.php
vendored
Normal file
|
@ -0,0 +1,364 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
/**
|
||||
* Atom parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Atom extends Parser
|
||||
{
|
||||
/**
|
||||
* Supported namespaces.
|
||||
*/
|
||||
protected $namespaces = array(
|
||||
'atom' => 'http://www.w3.org/2005/Atom',
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
{
|
||||
return XmlParser::getXPathResult($xml, 'atom:entry', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'entry');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setFeedUrl($this->getUrl($xml, 'self'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the site url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setSiteUrl($this->getUrl($xml, 'alternate', true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$description = XmlParser::getXPathResult($xml, 'atom:subtitle', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'subtitle');
|
||||
|
||||
$feed->setDescription(XmlParser::getValue($description));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$logo = XmlParser::getXPathResult($xml, 'atom:logo', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'logo');
|
||||
|
||||
$feed->setLogo(XmlParser::getValue($logo));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$icon = XmlParser::getXPathResult($xml, 'atom:icon', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'icon');
|
||||
|
||||
$feed->setIcon(XmlParser::getValue($icon));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$title = XmlParser::getXPathResult($xml, 'atom:title', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'title');
|
||||
|
||||
$feed->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $feed->getSiteUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed language.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$language = XmlParser::getXPathResult($xml, '*[not(self::atom:entry)]/@xml:lang', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, '@xml:lang');
|
||||
|
||||
$feed->setLanguage(XmlParser::getValue($language));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed id.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$id = XmlParser::getXPathResult($xml, 'atom:id', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'id');
|
||||
|
||||
$feed->setId(XmlParser::getValue($id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'updated');
|
||||
|
||||
$feed->setDate($this->getDateParser()->getDateTime(XmlParser::getValue($updated)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$published = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'published');
|
||||
|
||||
$updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'updated');
|
||||
|
||||
$published = !empty($published) ? $this->getDateParser()->getDateTime((string) current($published)) : null;
|
||||
$updated = !empty($updated) ? $this->getDateParser()->getDateTime((string) current($updated)) : null;
|
||||
|
||||
if ($published === null && $updated === null) {
|
||||
$item->setDate($feed->getDate()); // We use the feed date if there is no date for the item
|
||||
} elseif ($published !== null && $updated !== null) {
|
||||
$item->setDate(max($published, $updated)); // We use the most recent date between published and updated
|
||||
} else {
|
||||
$item->setDate($updated ?: $published);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$title = XmlParser::getXPathResult($entry, 'atom:title', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'title');
|
||||
|
||||
$item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $item->getUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item author.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$author = XmlParser::getXPathResult($entry, 'atom:author/atom:name', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'author/name')
|
||||
?: XmlParser::getXPathResult($xml, 'atom:author/atom:name', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'author/name');
|
||||
|
||||
$item->setAuthor(XmlParser::getValue($author));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->setContent($this->getContent($entry));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->setUrl($this->getUrl($entry, 'alternate', true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'id');
|
||||
|
||||
if (!empty($id)) {
|
||||
$item->setId($this->generateId(XmlParser::getValue($id)));
|
||||
} else {
|
||||
$item->setId($this->generateId(
|
||||
$item->getTitle(), $item->getUrl(), $item->getContent()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$enclosure = $this->findLink($entry, 'enclosure');
|
||||
|
||||
if ($enclosure) {
|
||||
$item->setEnclosureUrl(Url::resolve((string) $enclosure['href'], $feed->getSiteUrl()));
|
||||
$item->setEnclosureType((string) $enclosure['type']);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item language.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$language = XmlParser::getXPathResult($entry, './/@xml:lang');
|
||||
$item->setLanguage(XmlParser::getValue($language) ?: $feed->getLanguage());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL from a link tag.
|
||||
*
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @param string $rel Link relationship: alternate, enclosure, related, self, via
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function getUrl(SimpleXMLElement $xml, $rel, $fallback = false)
|
||||
{
|
||||
$link = $this->findLink($xml, $rel);
|
||||
|
||||
if ($link) {
|
||||
return (string) $link['href'];
|
||||
}
|
||||
|
||||
if ($fallback) {
|
||||
$link = $this->findLink($xml, '');
|
||||
return $link ? (string) $link['href'] : '';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a link tag that match a relationship.
|
||||
*
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @param string $rel Link relationship: alternate, enclosure, related, self, via
|
||||
*
|
||||
* @return SimpleXMLElement|null
|
||||
*/
|
||||
private function findLink(SimpleXMLElement $xml, $rel)
|
||||
{
|
||||
$links = XmlParser::getXPathResult($xml, 'atom:link', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'link');
|
||||
|
||||
foreach ($links as $link) {
|
||||
if ($rel === (string) $link['rel']) {
|
||||
return $link;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the entry content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry XML Entry
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function getContent(SimpleXMLElement $entry)
|
||||
{
|
||||
$content = current(
|
||||
XmlParser::getXPathResult($entry, 'atom:content', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'content')
|
||||
);
|
||||
|
||||
if (!empty($content) && count($content->children())) {
|
||||
$xml_string = '';
|
||||
|
||||
foreach ($content->children() as $child) {
|
||||
$xml_string .= $child->asXML();
|
||||
}
|
||||
|
||||
return $xml_string;
|
||||
} elseif (trim((string) $content) !== '') {
|
||||
return (string) $content;
|
||||
}
|
||||
|
||||
$summary = XmlParser::getXPathResult($entry, 'atom:summary', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'summary');
|
||||
|
||||
return (string) current($summary);
|
||||
}
|
||||
}
|
126
vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php
vendored
Normal file
126
vendor/fguillot/picofeed/lib/PicoFeed/Parser/DateParser.php
vendored
Normal file
|
@ -0,0 +1,126 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use DateTime;
|
||||
use DateTimeZone;
|
||||
use PicoFeed\Base;
|
||||
|
||||
/**
|
||||
* Date Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class DateParser extends Base
|
||||
{
|
||||
/**
|
||||
* Timezone used to parse feed dates.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $timezone = 'UTC';
|
||||
|
||||
/**
|
||||
* Supported formats [ 'format' => length ].
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $formats = array(
|
||||
DATE_ATOM => null,
|
||||
DATE_RSS => null,
|
||||
DATE_COOKIE => null,
|
||||
DATE_ISO8601 => null,
|
||||
DATE_RFC822 => null,
|
||||
DATE_RFC850 => null,
|
||||
DATE_RFC1036 => null,
|
||||
DATE_RFC1123 => null,
|
||||
DATE_RFC2822 => null,
|
||||
DATE_RFC3339 => null,
|
||||
'D, d M Y H:i:s' => 25,
|
||||
'D, d M Y h:i:s' => 25,
|
||||
'D M d Y H:i:s' => 24,
|
||||
'j M Y H:i:s' => 20,
|
||||
'Y-m-d H:i:s' => 19,
|
||||
'Y-m-d\TH:i:s' => 19,
|
||||
'd/m/Y H:i:s' => 19,
|
||||
'D, d M Y' => 16,
|
||||
'Y-m-d' => 10,
|
||||
'd-m-Y' => 10,
|
||||
'm-d-Y' => 10,
|
||||
'd.m.Y' => 10,
|
||||
'm.d.Y' => 10,
|
||||
'd/m/Y' => 10,
|
||||
'm/d/Y' => 10,
|
||||
);
|
||||
|
||||
/**
|
||||
* Try to parse all date format for broken feeds.
|
||||
*
|
||||
* @param string $value Original date format
|
||||
*
|
||||
* @return DateTime
|
||||
*/
|
||||
public function getDateTime($value)
|
||||
{
|
||||
$value = trim($value);
|
||||
|
||||
foreach ($this->formats as $format => $length) {
|
||||
$truncated_value = $value;
|
||||
if ($length !== null) {
|
||||
$truncated_value = substr($truncated_value, 0, $length);
|
||||
}
|
||||
|
||||
$date = $this->getValidDate($format, $truncated_value);
|
||||
if ($date !== false) {
|
||||
return $date;
|
||||
}
|
||||
}
|
||||
|
||||
return $this->getCurrentDateTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a valid date from a given format.
|
||||
*
|
||||
* @param string $format Date format
|
||||
* @param string $value Original date value
|
||||
*
|
||||
* @return DateTime|bool
|
||||
*/
|
||||
public function getValidDate($format, $value)
|
||||
{
|
||||
$date = DateTime::createFromFormat($format, $value, $this->getTimeZone());
|
||||
|
||||
if ($date !== false) {
|
||||
$errors = DateTime::getLastErrors();
|
||||
|
||||
if ($errors['error_count'] === 0 && $errors['warning_count'] === 0) {
|
||||
return $date;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current datetime.
|
||||
*
|
||||
* @return DateTime
|
||||
*/
|
||||
public function getCurrentDateTime()
|
||||
{
|
||||
return new DateTime('now', $this->getTimeZone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get DateTimeZone instance
|
||||
*
|
||||
* @access public
|
||||
* @return DateTimeZone
|
||||
*/
|
||||
public function getTimeZone()
|
||||
{
|
||||
return new DateTimeZone($this->config->getTimezone() ?: $this->timezone);
|
||||
}
|
||||
}
|
314
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php
vendored
Normal file
314
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php
vendored
Normal file
|
@ -0,0 +1,314 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* Feed.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Feed
|
||||
{
|
||||
/**
|
||||
* Feed items.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $items = array();
|
||||
|
||||
/**
|
||||
* Feed id.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $id = '';
|
||||
|
||||
/**
|
||||
* Feed title.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Feed description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $description = '';
|
||||
|
||||
/**
|
||||
* Feed url.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $feedUrl = '';
|
||||
|
||||
/**
|
||||
* Site url.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $siteUrl = '';
|
||||
|
||||
/**
|
||||
* Feed date.
|
||||
*
|
||||
* @var \DateTime
|
||||
*/
|
||||
public $date = null;
|
||||
|
||||
/**
|
||||
* Feed language.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $language = '';
|
||||
|
||||
/**
|
||||
* Feed logo URL.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $logo = '';
|
||||
|
||||
/**
|
||||
* Feed icon URL.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $icon = '';
|
||||
|
||||
/**
|
||||
* Return feed information.
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
$output = '';
|
||||
|
||||
foreach (array('id', 'title', 'feedUrl', 'siteUrl', 'language', 'description', 'logo') as $property) {
|
||||
$output .= 'Feed::'.$property.' = '.$this->$property.PHP_EOL;
|
||||
}
|
||||
|
||||
$output .= 'Feed::date = '.$this->date->format(DATE_RFC822).PHP_EOL;
|
||||
$output .= 'Feed::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL;
|
||||
$output .= 'Feed::items = '.count($this->items).' items'.PHP_EOL;
|
||||
|
||||
foreach ($this->items as $item) {
|
||||
$output .= '----'.PHP_EOL;
|
||||
$output .= $item;
|
||||
}
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get title.
|
||||
*/
|
||||
public function getTitle()
|
||||
{
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get description.
|
||||
*/
|
||||
public function getDescription()
|
||||
{
|
||||
return $this->description;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the logo url.
|
||||
*/
|
||||
public function getLogo()
|
||||
{
|
||||
return $this->logo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon url.
|
||||
*/
|
||||
public function getIcon()
|
||||
{
|
||||
return $this->icon;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get feed url.
|
||||
*/
|
||||
public function getFeedUrl()
|
||||
{
|
||||
return $this->feedUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get site url.
|
||||
*/
|
||||
public function getSiteUrl()
|
||||
{
|
||||
return $this->siteUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get date.
|
||||
*/
|
||||
public function getDate()
|
||||
{
|
||||
return $this->date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get language.
|
||||
*/
|
||||
public function getLanguage()
|
||||
{
|
||||
return $this->language;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get id.
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get feed items.
|
||||
*/
|
||||
public function getItems()
|
||||
{
|
||||
return $this->items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the feed is "Right to Left".
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isRTL()
|
||||
{
|
||||
return Parser::isLanguageRTL($this->language);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed items.
|
||||
*
|
||||
* @param Item[] $items
|
||||
* @return Feed
|
||||
*/
|
||||
public function setItems(array $items)
|
||||
{
|
||||
$this->items = $items;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed id.
|
||||
*
|
||||
* @param string $id
|
||||
* @return Feed
|
||||
*/
|
||||
public function setId($id)
|
||||
{
|
||||
$this->id = $id;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed title.
|
||||
*
|
||||
* @param string $title
|
||||
* @return Feed
|
||||
*/
|
||||
public function setTitle($title)
|
||||
{
|
||||
$this->title = $title;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed description.
|
||||
*
|
||||
* @param string $description
|
||||
* @return Feed
|
||||
*/
|
||||
public function setDescription($description)
|
||||
{
|
||||
$this->description = $description;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed url.
|
||||
*
|
||||
* @param string $feedUrl
|
||||
* @return Feed
|
||||
*/
|
||||
public function setFeedUrl($feedUrl)
|
||||
{
|
||||
$this->feedUrl = $feedUrl;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed website url.
|
||||
*
|
||||
* @param string $siteUrl
|
||||
* @return Feed
|
||||
*/
|
||||
public function setSiteUrl($siteUrl)
|
||||
{
|
||||
$this->siteUrl = $siteUrl;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed date.
|
||||
*
|
||||
* @param \DateTime $date
|
||||
* @return Feed
|
||||
*/
|
||||
public function setDate($date)
|
||||
{
|
||||
$this->date = $date;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed language.
|
||||
*
|
||||
* @param string $language
|
||||
* @return Feed
|
||||
*/
|
||||
public function setLanguage($language)
|
||||
{
|
||||
$this->language = $language;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed logo.
|
||||
*
|
||||
* @param string $logo
|
||||
* @return Feed
|
||||
*/
|
||||
public function setLogo($logo)
|
||||
{
|
||||
$this->logo = $logo;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set feed icon.
|
||||
*
|
||||
* @param string $icon
|
||||
* @return Feed
|
||||
*/
|
||||
public function setIcon($icon)
|
||||
{
|
||||
$this->icon = $icon;
|
||||
return $this;
|
||||
}
|
||||
}
|
415
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php
vendored
Normal file
415
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Item.php
vendored
Normal file
|
@ -0,0 +1,415 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* Feed Item.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Item
|
||||
{
|
||||
/**
|
||||
* List of known RTL languages.
|
||||
*
|
||||
* @var string[]
|
||||
*/
|
||||
public $rtl = array(
|
||||
'ar', // Arabic (ar-**)
|
||||
'fa', // Farsi (fa-**)
|
||||
'ur', // Urdu (ur-**)
|
||||
'ps', // Pashtu (ps-**)
|
||||
'syr', // Syriac (syr-**)
|
||||
'dv', // Divehi (dv-**)
|
||||
'he', // Hebrew (he-**)
|
||||
'yi', // Yiddish (yi-**)
|
||||
);
|
||||
|
||||
/**
|
||||
* Item id.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $id = '';
|
||||
|
||||
/**
|
||||
* Item title.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Item url.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $url = '';
|
||||
|
||||
/**
|
||||
* Item author.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $author = '';
|
||||
|
||||
/**
|
||||
* Item date.
|
||||
*
|
||||
* @var \DateTime
|
||||
*/
|
||||
public $date = null;
|
||||
|
||||
/**
|
||||
* Item content.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $content = '';
|
||||
|
||||
/**
|
||||
* Item enclosure url.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $enclosureUrl = '';
|
||||
|
||||
/**
|
||||
* Item enclusure type.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $enclosureType = '';
|
||||
|
||||
/**
|
||||
* Item language.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $language = '';
|
||||
|
||||
/**
|
||||
* Raw XML.
|
||||
*
|
||||
* @var \SimpleXMLElement
|
||||
*/
|
||||
public $xml;
|
||||
|
||||
/**
|
||||
* List of namespaces.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $namespaces = array();
|
||||
|
||||
/**
|
||||
* Check if a XML namespace exists
|
||||
*
|
||||
* @access public
|
||||
* @param string $namespace
|
||||
* @return bool
|
||||
*/
|
||||
public function hasNamespace($namespace)
|
||||
{
|
||||
return array_key_exists($namespace, $this->namespaces);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get specific XML tag or attribute value.
|
||||
*
|
||||
* @param string $tag Tag name (examples: guid, media:content)
|
||||
* @param string $attribute Tag attribute
|
||||
*
|
||||
* @return array|false Tag values or error
|
||||
*/
|
||||
public function getTag($tag, $attribute = '')
|
||||
{
|
||||
if ($attribute !== '') {
|
||||
$attribute = '/@'.$attribute;
|
||||
}
|
||||
|
||||
$query = './/'.$tag.$attribute;
|
||||
$elements = XmlParser::getXPathResult($this->xml, $query, $this->namespaces);
|
||||
|
||||
if ($elements === false) { // xPath error
|
||||
return false;
|
||||
}
|
||||
|
||||
return array_map(function ($element) { return (string) $element;}, $elements);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return item information.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
$output = '';
|
||||
|
||||
foreach (array('id', 'title', 'url', 'language', 'author', 'enclosureUrl', 'enclosureType') as $property) {
|
||||
$output .= 'Item::'.$property.' = '.$this->$property.PHP_EOL;
|
||||
}
|
||||
|
||||
$output .= 'Item::date = '.$this->date->format(DATE_RFC822).PHP_EOL;
|
||||
$output .= 'Item::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL;
|
||||
$output .= 'Item::content = '.strlen($this->content).' bytes'.PHP_EOL;
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get title.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getTitle()
|
||||
{
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get URL
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
return $this->url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set URL
|
||||
*
|
||||
* @access public
|
||||
* @param string $url
|
||||
* @return Item
|
||||
*/
|
||||
public function setUrl($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get id.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get date.
|
||||
*
|
||||
* @return \DateTime
|
||||
*/
|
||||
public function getDate()
|
||||
{
|
||||
return $this->date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get content.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set content
|
||||
*
|
||||
* @access public
|
||||
* @param string $value
|
||||
* @return Item
|
||||
*/
|
||||
public function setContent($value)
|
||||
{
|
||||
$this->content = $value;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get enclosure url.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getEnclosureUrl()
|
||||
{
|
||||
return $this->enclosureUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get enclosure type.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getEnclosureType()
|
||||
{
|
||||
return $this->enclosureType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get language.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getLanguage()
|
||||
{
|
||||
return $this->language;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get author.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getAuthor()
|
||||
{
|
||||
return $this->author;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the item is "Right to Left".
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isRTL()
|
||||
{
|
||||
return Parser::isLanguageRTL($this->language);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set item id.
|
||||
*
|
||||
* @param string $id
|
||||
* @return Item
|
||||
*/
|
||||
public function setId($id)
|
||||
{
|
||||
$this->id = $id;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set item title.
|
||||
*
|
||||
* @param string $title
|
||||
* @return Item
|
||||
*/
|
||||
public function setTitle($title)
|
||||
{
|
||||
$this->title = $title;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set author.
|
||||
*
|
||||
* @param string $author
|
||||
* @return Item
|
||||
*/
|
||||
public function setAuthor($author)
|
||||
{
|
||||
$this->author = $author;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set item date.
|
||||
*
|
||||
* @param \DateTime $date
|
||||
* @return Item
|
||||
*/
|
||||
public function setDate($date)
|
||||
{
|
||||
$this->date = $date;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set enclosure url.
|
||||
*
|
||||
* @param string $enclosureUrl
|
||||
* @return Item
|
||||
*/
|
||||
public function setEnclosureUrl($enclosureUrl)
|
||||
{
|
||||
$this->enclosureUrl = $enclosureUrl;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set enclosure type.
|
||||
*
|
||||
* @param string $enclosureType
|
||||
* @return Item
|
||||
*/
|
||||
public function setEnclosureType($enclosureType)
|
||||
{
|
||||
$this->enclosureType = $enclosureType;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set item language.
|
||||
*
|
||||
* @param string $language
|
||||
* @return Item
|
||||
*/
|
||||
public function setLanguage($language)
|
||||
{
|
||||
$this->language = $language;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set raw XML.
|
||||
*
|
||||
* @param \SimpleXMLElement $xml
|
||||
* @return Item
|
||||
*/
|
||||
public function setXml($xml)
|
||||
{
|
||||
$this->xml = $xml;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get raw XML.
|
||||
*
|
||||
* @return \SimpleXMLElement
|
||||
*/
|
||||
public function getXml()
|
||||
{
|
||||
return $this->xml;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set XML namespaces.
|
||||
*
|
||||
* @param array $namespaces
|
||||
* @return Item
|
||||
*/
|
||||
public function setNamespaces($namespaces)
|
||||
{
|
||||
$this->namespaces = $namespaces;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get XML namespaces.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getNamespaces()
|
||||
{
|
||||
return $this->namespaces;
|
||||
}
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/MalformedXmlException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/MalformedXmlException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* MalformedXmlException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class MalformedXmlException extends ParserException
|
||||
{
|
||||
}
|
523
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
vendored
Normal file
523
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
vendored
Normal file
|
@ -0,0 +1,523 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use PicoFeed\Processor\ContentFilterProcessor;
|
||||
use PicoFeed\Processor\ContentGeneratorProcessor;
|
||||
use PicoFeed\Processor\ItemPostProcessor;
|
||||
use PicoFeed\Processor\ScraperProcessor;
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Client\Url;
|
||||
use PicoFeed\Encoding\Encoding;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Base parser class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class Parser
|
||||
{
|
||||
/**
|
||||
* Config object.
|
||||
*
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* DateParser object.
|
||||
*
|
||||
* @var \PicoFeed\Parser\DateParser
|
||||
*/
|
||||
private $dateParser;
|
||||
|
||||
/**
|
||||
* Hash algorithm used to generate item id, any value supported by PHP, see hash_algos().
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $hash_algo = 'sha256';
|
||||
|
||||
/**
|
||||
* Feed content (XML data).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $content = '';
|
||||
|
||||
/**
|
||||
* Fallback url.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $fallback_url = '';
|
||||
|
||||
/**
|
||||
* XML namespaces supported by parser.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $namespaces = array();
|
||||
|
||||
/**
|
||||
* XML namespaces used in document.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $used_namespaces = array();
|
||||
|
||||
/**
|
||||
* Item Post Processor instance
|
||||
*
|
||||
* @access private
|
||||
* @var ItemPostProcessor
|
||||
*/
|
||||
private $itemPostProcessor;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param string $content Feed content
|
||||
* @param string $http_encoding HTTP encoding (headers)
|
||||
* @param string $fallback_url Fallback url when the feed provide relative or broken url
|
||||
*/
|
||||
public function __construct($content, $http_encoding = '', $fallback_url = '')
|
||||
{
|
||||
$this->fallback_url = $fallback_url;
|
||||
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
|
||||
|
||||
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing
|
||||
$this->content = Filter::stripXmlTag($content);
|
||||
|
||||
// Encode everything in UTF-8
|
||||
Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
|
||||
|
||||
$this->itemPostProcessor = new ItemPostProcessor($this->config);
|
||||
$this->itemPostProcessor->register(new ContentGeneratorProcessor($this->config));
|
||||
$this->itemPostProcessor->register(new ContentFilterProcessor($this->config));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the document.
|
||||
*
|
||||
* @return \PicoFeed\Parser\Feed
|
||||
*/
|
||||
public function execute()
|
||||
{
|
||||
Logger::setMessage(get_called_class().': begin parsing');
|
||||
|
||||
$xml = XmlParser::getSimpleXml($this->content);
|
||||
|
||||
if ($xml === false) {
|
||||
Logger::setMessage(get_called_class().': Applying XML workarounds');
|
||||
$this->content = Filter::normalizeData($this->content);
|
||||
$xml = XmlParser::getSimpleXml($this->content);
|
||||
|
||||
if ($xml === false) {
|
||||
Logger::setMessage(get_called_class().': XML parsing error');
|
||||
Logger::setMessage(XmlParser::getErrors());
|
||||
throw new MalformedXmlException('XML parsing error');
|
||||
}
|
||||
}
|
||||
|
||||
$this->used_namespaces = $xml->getNamespaces(true);
|
||||
$xml = $this->registerSupportedNamespaces($xml);
|
||||
|
||||
$feed = new Feed();
|
||||
|
||||
$this->findFeedUrl($xml, $feed);
|
||||
$this->checkFeedUrl($feed);
|
||||
|
||||
$this->findSiteUrl($xml, $feed);
|
||||
$this->checkSiteUrl($feed);
|
||||
|
||||
$this->findFeedTitle($xml, $feed);
|
||||
$this->findFeedDescription($xml, $feed);
|
||||
$this->findFeedLanguage($xml, $feed);
|
||||
$this->findFeedId($xml, $feed);
|
||||
$this->findFeedDate($xml, $feed);
|
||||
$this->findFeedLogo($xml, $feed);
|
||||
$this->findFeedIcon($xml, $feed);
|
||||
|
||||
foreach ($this->getItemsTree($xml) as $entry) {
|
||||
$entry = $this->registerSupportedNamespaces($entry);
|
||||
|
||||
$item = new Item();
|
||||
$item->xml = $entry;
|
||||
$item->namespaces = $this->used_namespaces;
|
||||
|
||||
$this->findItemAuthor($xml, $entry, $item);
|
||||
|
||||
$this->findItemUrl($entry, $item);
|
||||
$this->checkItemUrl($feed, $item);
|
||||
|
||||
$this->findItemTitle($entry, $item);
|
||||
$this->findItemContent($entry, $item);
|
||||
|
||||
// Id generation can use the item url/title/content (order is important)
|
||||
$this->findItemId($entry, $item, $feed);
|
||||
$this->findItemDate($entry, $item, $feed);
|
||||
$this->findItemEnclosure($entry, $item, $feed);
|
||||
$this->findItemLanguage($entry, $item, $feed);
|
||||
|
||||
$this->itemPostProcessor->execute($feed, $item);
|
||||
$feed->items[] = $item;
|
||||
}
|
||||
|
||||
Logger::setMessage(get_called_class().PHP_EOL.$feed);
|
||||
|
||||
return $feed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the feed url is correct.
|
||||
*
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function checkFeedUrl(Feed $feed)
|
||||
{
|
||||
if ($feed->getFeedUrl() === '') {
|
||||
$feed->feedUrl = $this->fallback_url;
|
||||
} else {
|
||||
$feed->feedUrl = Url::resolve($feed->getFeedUrl(), $this->fallback_url);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the site url is correct.
|
||||
*
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function checkSiteUrl(Feed $feed)
|
||||
{
|
||||
if ($feed->getSiteUrl() === '') {
|
||||
$feed->siteUrl = Url::base($feed->getFeedUrl());
|
||||
} else {
|
||||
$feed->siteUrl = Url::resolve($feed->getSiteUrl(), $this->fallback_url);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the item url is correct.
|
||||
*
|
||||
* @param Feed $feed Feed object
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function checkItemUrl(Feed $feed, Item $item)
|
||||
{
|
||||
$item->url = Url::resolve($item->getUrl(), $feed->getSiteUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Item Post Processor instance
|
||||
*
|
||||
* @access public
|
||||
* @return ItemPostProcessor
|
||||
*/
|
||||
public function getItemPostProcessor()
|
||||
{
|
||||
return $this->itemPostProcessor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get DateParser instance
|
||||
*
|
||||
* @access public
|
||||
* @return DateParser
|
||||
*/
|
||||
public function getDateParser()
|
||||
{
|
||||
if ($this->dateParser === null) {
|
||||
return new DateParser($this->config);
|
||||
}
|
||||
|
||||
return $this->dateParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unique id for an entry (hash all arguments).
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function generateId()
|
||||
{
|
||||
return hash($this->hash_algo, implode(func_get_args()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the given language is "Right to Left".
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $language Language: fr-FR, en-US
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public static function isLanguageRTL($language)
|
||||
{
|
||||
$language = strtolower($language);
|
||||
|
||||
$rtl_languages = array(
|
||||
'ar', // Arabic (ar-**)
|
||||
'fa', // Farsi (fa-**)
|
||||
'ur', // Urdu (ur-**)
|
||||
'ps', // Pashtu (ps-**)
|
||||
'syr', // Syriac (syr-**)
|
||||
'dv', // Divehi (dv-**)
|
||||
'he', // Hebrew (he-**)
|
||||
'yi', // Yiddish (yi-**)
|
||||
);
|
||||
|
||||
foreach ($rtl_languages as $prefix) {
|
||||
if (strpos($language, $prefix) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Hash algorithm used for id generation.
|
||||
*
|
||||
* @param string $algo Algorithm name
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function setHashAlgo($algo)
|
||||
{
|
||||
$this->hash_algo = $algo ?: $this->hash_algo;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object.
|
||||
*
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
*
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the content grabber.
|
||||
*
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function disableContentFiltering()
|
||||
{
|
||||
$this->itemPostProcessor->unregister('PicoFeed\Processor\ContentFilterProcessor');
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the content grabber.
|
||||
*
|
||||
* @param bool $needsRuleFile true if only pages with rule files should be
|
||||
* scraped
|
||||
* @param null|\Closure $scraperCallback Callback function that gets called for each
|
||||
* scraper execution
|
||||
*
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function enableContentGrabber($needsRuleFile = false, $scraperCallback = null)
|
||||
{
|
||||
$processor = new ScraperProcessor($this->config);
|
||||
|
||||
if ($needsRuleFile) {
|
||||
$processor->getScraper()->disableCandidateParser();
|
||||
}
|
||||
|
||||
if ($scraperCallback !== null) {
|
||||
$processor->setExecutionCallback($scraperCallback);
|
||||
}
|
||||
|
||||
$this->itemPostProcessor->register($processor);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set ignored URLs for the content grabber.
|
||||
*
|
||||
* @param array $urls URLs
|
||||
*
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function setGrabberIgnoreUrls(array $urls)
|
||||
{
|
||||
$this->itemPostProcessor->getProcessor('PicoFeed\Processor\ScraperProcessor')->ignoreUrls($urls);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register all supported namespaces to be used within an xpath query.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function registerSupportedNamespaces(SimpleXMLElement $xml)
|
||||
{
|
||||
foreach ($this->namespaces as $prefix => $ns) {
|
||||
$xml->registerXPathNamespace($prefix, $ns);
|
||||
}
|
||||
|
||||
return $xml;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the site url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed title.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed description.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed language.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed id.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed date.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
abstract public function getItemsTree(SimpleXMLElement $xml);
|
||||
|
||||
/**
|
||||
* Find the item author.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item URL.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemUrl(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item title.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemTitle(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
abstract public function findItemContent(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item language.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
}
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Parser/ParserException.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Parser/ParserException.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
/**
|
||||
* ParserException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class ParserException extends PicoFeedException
|
||||
{
|
||||
}
|
277
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php
vendored
Normal file
277
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss10.php
vendored
Normal file
|
@ -0,0 +1,277 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Filter\Filter;
|
||||
|
||||
/**
|
||||
* RSS 1.0 parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss10 extends Parser
|
||||
{
|
||||
/**
|
||||
* Supported namespaces.
|
||||
*/
|
||||
protected $namespaces = array(
|
||||
'rss' => 'http://purl.org/rss/1.0/',
|
||||
'dc' => 'http://purl.org/dc/elements/1.1/',
|
||||
'content' => 'http://purl.org/rss/1.0/modules/content/',
|
||||
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
{
|
||||
return XmlParser::getXPathResult($xml, 'rss:item', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'item')
|
||||
?: $xml->item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setFeedUrl('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the site url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$value = XmlParser::getXPathResult($xml, 'rss:channel/rss:link', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'channel/link')
|
||||
?: $xml->channel->link;
|
||||
|
||||
$feed->setSiteUrl(XmlParser::getValue($value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$description = XmlParser::getXPathResult($xml, 'rss:channel/rss:description', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'channel/description')
|
||||
?: $xml->channel->description;
|
||||
|
||||
$feed->setDescription(XmlParser::getValue($description));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$logo = XmlParser::getXPathResult($xml, 'rss:image/rss:url', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'image/url');
|
||||
|
||||
$feed->setLogo(XmlParser::getValue($logo));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setIcon('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$title = XmlParser::getXPathResult($xml, 'rss:channel/rss:title', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'channel/title')
|
||||
?: $xml->channel->title;
|
||||
|
||||
$feed->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $feed->getSiteUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed language.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$language = XmlParser::getXPathResult($xml, 'rss:channel/dc:language', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'channel/dc:language', $this->namespaces);
|
||||
|
||||
$feed->setLanguage(XmlParser::getValue($language));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed id.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setId($feed->getFeedUrl() ?: $feed->getSiteUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$date = XmlParser::getXPathResult($xml, 'rss:channel/dc:date', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'channel/dc:date', $this->namespaces);
|
||||
|
||||
$feed->setDate($this->getDateParser()->getDateTime(XmlParser::getValue($date)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$date = XmlParser::getXPathResult($entry, 'dc:date', $this->namespaces);
|
||||
|
||||
$item->setDate(empty($date) ? $feed->getDate() : $this->getDateParser()->getDateTime(XmlParser::getValue($date)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$title = XmlParser::getXPathResult($entry, 'rss:title', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'title')
|
||||
?: $entry->title;
|
||||
|
||||
$item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $item->getUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item author.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$author = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'rss:channel/dc:creator', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces);
|
||||
|
||||
$item->setAuthor(XmlParser::getValue($author));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces);
|
||||
|
||||
if (XmlParser::getValue($content) === '') {
|
||||
$content = XmlParser::getXPathResult($entry, 'rss:description', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'description')
|
||||
?: $entry->description;
|
||||
}
|
||||
|
||||
$item->setContent(XmlParser::getValue($content));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'rss:link', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'link')
|
||||
?: $entry->link;
|
||||
|
||||
$item->setUrl(XmlParser::getValue($link));
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$item->setId($this->generateId(
|
||||
$item->getTitle(), $item->getUrl(), $item->getContent()
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item language.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces);
|
||||
|
||||
$item->setLanguage(XmlParser::getValue($language) ?: $feed->getLanguage());
|
||||
}
|
||||
}
|
289
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php
vendored
Normal file
289
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php
vendored
Normal file
|
@ -0,0 +1,289 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
/**
|
||||
* RSS 2.0 Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss20 extends Parser
|
||||
{
|
||||
/**
|
||||
* Supported namespaces.
|
||||
*/
|
||||
protected $namespaces = array(
|
||||
'dc' => 'http://purl.org/dc/elements/1.1/',
|
||||
'content' => 'http://purl.org/rss/1.0/modules/content/',
|
||||
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
|
||||
'atom' => 'http://www.w3.org/2005/Atom',
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
{
|
||||
return XmlParser::getXPathResult($xml, 'channel/item');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setFeedUrl('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the site url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$value = XmlParser::getXPathResult($xml, 'channel/link');
|
||||
$feed->setSiteUrl(XmlParser::getValue($value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$value = XmlParser::getXPathResult($xml, 'channel/description');
|
||||
$feed->setDescription(XmlParser::getValue($value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$value = XmlParser::getXPathResult($xml, 'channel/image/url');
|
||||
$feed->setLogo(XmlParser::getValue($value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setIcon('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$title = XmlParser::getXPathResult($xml, 'channel/title');
|
||||
$feed->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $feed->getSiteUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed language.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$value = XmlParser::getXPathResult($xml, 'channel/language');
|
||||
$feed->setLanguage(XmlParser::getValue($value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed id.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->setId($feed->getFeedUrl() ?: $feed->getSiteUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate');
|
||||
$update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate');
|
||||
|
||||
$published = !empty($publish_date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($publish_date)) : null;
|
||||
$updated = !empty($update_date) ? $this->getDateParser()->getDateTime(XmlParser::getValue($update_date)) : null;
|
||||
|
||||
if ($published === null && $updated === null) {
|
||||
$feed->setDate($this->getDateParser()->getCurrentDateTime()); // We use the current date if there is no date for the feed
|
||||
} elseif ($published !== null && $updated !== null) {
|
||||
$feed->setDate(max($published, $updated)); // We use the most recent date between published and updated
|
||||
} else {
|
||||
$feed->setDate($updated ?: $published);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$date = XmlParser::getXPathResult($entry, 'pubDate');
|
||||
|
||||
$item->setDate(empty($date) ? $feed->getDate() : $this->getDateParser()->getDateTime(XmlParser::getValue($date)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$value = XmlParser::getXPathResult($entry, 'title');
|
||||
$item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($value)) ?: $item->getUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item author.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$value = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'author')
|
||||
?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($xml, 'channel/managingEditor');
|
||||
|
||||
$item->setAuthor(XmlParser::getValue($value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces);
|
||||
|
||||
if (XmlParser::getValue($content) === '') {
|
||||
$content = XmlParser::getXPathResult($entry, 'description');
|
||||
}
|
||||
|
||||
$item->setContent(XmlParser::getValue($content));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'link')
|
||||
?: XmlParser::getXPathResult($entry, 'atom:link/@href', $this->namespaces);
|
||||
|
||||
if (!empty($link)) {
|
||||
$item->setUrl(XmlParser::getValue($link));
|
||||
} else {
|
||||
$link = XmlParser::getXPathResult($entry, 'guid');
|
||||
$link = XmlParser::getValue($link);
|
||||
|
||||
if (filter_var($link, FILTER_VALIDATE_URL) !== false) {
|
||||
$item->setUrl($link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$id = XmlParser::getValue(XmlParser::getXPathResult($entry, 'guid'));
|
||||
|
||||
if ($id) {
|
||||
$item->setId($this->generateId($id));
|
||||
} else {
|
||||
$item->setId($this->generateId(
|
||||
$item->getTitle(), $item->getUrl(), $item->getContent()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
if (isset($entry->enclosure)) {
|
||||
$type = XmlParser::getXPathResult($entry, 'enclosure/@type');
|
||||
$url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'enclosure/@url');
|
||||
|
||||
$item->setEnclosureUrl(Url::resolve(XmlParser::getValue($url), $feed->getSiteUrl()));
|
||||
$item->setEnclosureType(XmlParser::getValue($type));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item language.
|
||||
*
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces);
|
||||
$item->setLanguage(XmlParser::getValue($language) ?: $feed->getLanguage());
|
||||
}
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss91.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss91.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* RSS 0.91 Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss91 extends Rss20
|
||||
{
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss92.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/Rss92.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* RSS 0.92 Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Rss92 extends Rss20
|
||||
{
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* XmlEntityException Exception.
|
||||
*
|
||||
* @author Bernhard Posselt
|
||||
*/
|
||||
class XmlEntityException extends MalformedXmlException
|
||||
{
|
||||
}
|
236
vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php
vendored
Normal file
236
vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php
vendored
Normal file
|
@ -0,0 +1,236 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
use DomDocument;
|
||||
use SimpleXmlElement;
|
||||
|
||||
use ZendXml\Security;
|
||||
|
||||
/**
|
||||
* XML parser class.
|
||||
*
|
||||
* Checks for XML eXternal Entity (XXE) and XML Entity Expansion (XEE) attacks on XML documents
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class XmlParser
|
||||
{
|
||||
/**
|
||||
* Get a SimpleXmlElement instance or return false.
|
||||
*
|
||||
* @static
|
||||
* @param string $input XML content
|
||||
* @return mixed
|
||||
*/
|
||||
public static function getSimpleXml($input)
|
||||
{
|
||||
return self::scan($input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a DomDocument instance or return false.
|
||||
*
|
||||
* @static
|
||||
* @param string $input XML content
|
||||
* @return \DOMDocument
|
||||
*/
|
||||
public static function getDomDocument($input)
|
||||
{
|
||||
if (empty($input)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$dom = self::scan($input, new DOMDocument());
|
||||
|
||||
// The document is empty, there is probably some parsing errors
|
||||
if ($dom && $dom->childNodes->length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $dom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Small wrapper around ZendXml to turn their exceptions into picoFeed
|
||||
* exceptions
|
||||
*
|
||||
* @param $input the xml to load
|
||||
* @param $dom pass in a dom document or use null/omit if simpleXml should
|
||||
* be used
|
||||
*/
|
||||
private static function scan($input, $dom = null)
|
||||
{
|
||||
try {
|
||||
return Security::scan($input, $dom);
|
||||
} catch(\ZendXml\Exception\RuntimeException $e) {
|
||||
throw new XmlEntityException($e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load HTML document by using a DomDocument instance or return false on failure.
|
||||
*
|
||||
* @static
|
||||
* @param string $input XML content
|
||||
* @return \DOMDocument
|
||||
*/
|
||||
public static function getHtmlDocument($input)
|
||||
{
|
||||
$dom = new DomDocument();
|
||||
|
||||
if (empty($input)) {
|
||||
return $dom;
|
||||
}
|
||||
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
|
||||
$dom->loadHTML($input, LIBXML_NONET);
|
||||
} else {
|
||||
$dom->loadHTML($input);
|
||||
}
|
||||
|
||||
return $dom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a HTML document to XML.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @param string $html HTML document
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function htmlToXml($html)
|
||||
{
|
||||
$dom = self::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html);
|
||||
return $dom->saveXML($dom->getElementsByTagName('body')->item(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get XML parser errors.
|
||||
*
|
||||
* @static
|
||||
* @return string
|
||||
*/
|
||||
public static function getErrors()
|
||||
{
|
||||
$errors = array();
|
||||
|
||||
foreach (libxml_get_errors() as $error) {
|
||||
$errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
|
||||
$error->message,
|
||||
$error->line,
|
||||
$error->column,
|
||||
$error->code
|
||||
);
|
||||
}
|
||||
|
||||
return implode(', ', $errors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the encoding from a xml tag.
|
||||
*
|
||||
* @static
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromXmlTag($data)
|
||||
{
|
||||
$encoding = '';
|
||||
|
||||
if (strpos($data, '<?xml') !== false) {
|
||||
$data = substr($data, 0, strrpos($data, '?>'));
|
||||
$data = str_replace("'", '"', $data);
|
||||
|
||||
$p1 = strpos($data, 'encoding=');
|
||||
$p2 = strpos($data, '"', $p1 + 10);
|
||||
|
||||
if ($p1 !== false && $p2 !== false) {
|
||||
$encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
|
||||
$encoding = strtolower($encoding);
|
||||
}
|
||||
}
|
||||
|
||||
return $encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the charset from a meta tag.
|
||||
*
|
||||
* @static
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromMetaTag($data)
|
||||
{
|
||||
$encoding = '';
|
||||
|
||||
if (preg_match('/<meta.*?charset\s*=\s*["\']?\s*([^"\'\s\/>;]+)/i', $data, $match) === 1) {
|
||||
$encoding = strtolower($match[1]);
|
||||
}
|
||||
|
||||
return $encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite XPath query to use namespace-uri and local-name derived from prefix.
|
||||
*
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
* @return string
|
||||
*/
|
||||
public static function replaceXPathPrefixWithNamespaceURI($query, array $ns)
|
||||
{
|
||||
return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function ($matches) use ($ns) {
|
||||
// don't try to map the special prefix XML
|
||||
if (strtolower($matches[1]) === 'xml') {
|
||||
return $matches[0];
|
||||
}
|
||||
|
||||
return '*[namespace-uri()="'.$ns[$matches[1]].'" and local-name()="'.$matches[2].'"]';
|
||||
},
|
||||
$query);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the result elements of a XPath query.
|
||||
*
|
||||
* @param \SimpleXMLElement $xml XML element
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
* @return \SimpleXMLElement[]
|
||||
*/
|
||||
public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array())
|
||||
{
|
||||
if (!empty($ns)) {
|
||||
$query = static::replaceXPathPrefixWithNamespaceURI($query, $ns);
|
||||
}
|
||||
|
||||
return $xml->xpath($query);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the first Xpath result or SimpleXMLElement value
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param mixed $value
|
||||
* @return string
|
||||
*/
|
||||
public static function getValue($value)
|
||||
{
|
||||
$result = '';
|
||||
|
||||
if (is_array($value) && count($value) > 0) {
|
||||
$result = (string) $value[0];
|
||||
} elseif (is_a($value, 'SimpleXMLElement')) {
|
||||
return $result = (string) $value;
|
||||
}
|
||||
|
||||
return trim($result);
|
||||
}
|
||||
}
|
14
vendor/fguillot/picofeed/lib/PicoFeed/PicoFeedException.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/PicoFeedException.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* PicoFeedException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class PicoFeedException extends Exception
|
||||
{
|
||||
}
|
37
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentFilterProcessor.php
vendored
Normal file
37
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentFilterProcessor.php
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Processor;
|
||||
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Parser\Feed;
|
||||
use PicoFeed\Parser\Item;
|
||||
|
||||
/**
|
||||
* Item Content Filter
|
||||
*
|
||||
* @package PicoFeed\Processor
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class ContentFilterProcessor extends Base implements ItemProcessorInterface
|
||||
{
|
||||
/**
|
||||
* Execute Item Processor
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed
|
||||
* @param Item $item
|
||||
* @return bool
|
||||
*/
|
||||
public function execute(Feed $feed, Item $item)
|
||||
{
|
||||
if ($this->config->getContentFiltering(true)) {
|
||||
$filter = Filter::html($item->getContent(), $feed->getSiteUrl());
|
||||
$filter->setConfig($this->config);
|
||||
$item->setContent($filter->execute());
|
||||
} else {
|
||||
Logger::setMessage(get_called_class().': Content filtering disabled');
|
||||
}
|
||||
}
|
||||
}
|
49
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentGeneratorProcessor.php
vendored
Normal file
49
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ContentGeneratorProcessor.php
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Processor;
|
||||
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Parser\Feed;
|
||||
use PicoFeed\Parser\Item;
|
||||
|
||||
/**
|
||||
* Item Content Generator
|
||||
*
|
||||
* @package PicoFeed\Processor
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class ContentGeneratorProcessor extends Base implements ItemProcessorInterface
|
||||
{
|
||||
/**
|
||||
* List of generators
|
||||
*
|
||||
* @access protected
|
||||
* @var array
|
||||
*/
|
||||
protected $generators = array(
|
||||
'youtube',
|
||||
'file',
|
||||
);
|
||||
|
||||
/**
|
||||
* Execute Item Processor
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed
|
||||
* @param Item $item
|
||||
* @return bool
|
||||
*/
|
||||
public function execute(Feed $feed, Item $item)
|
||||
{
|
||||
foreach ($this->generators as $generator) {
|
||||
$className = '\PicoFeed\Generator\\'.ucfirst($generator).'ContentGenerator';
|
||||
$object = new $className($this->config);
|
||||
|
||||
if ($object->execute($item)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
96
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php
vendored
Normal file
96
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php
vendored
Normal file
|
@ -0,0 +1,96 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Processor;
|
||||
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Parser\Feed;
|
||||
use PicoFeed\Parser\Item;
|
||||
|
||||
/**
|
||||
* Item Post Processor
|
||||
*
|
||||
* @package PicoFeed\Processor
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class ItemPostProcessor extends Base
|
||||
{
|
||||
/**
|
||||
* List of processors
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $processors = array();
|
||||
|
||||
/**
|
||||
* Execute all processors
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed
|
||||
* @param Item $item
|
||||
* @return bool
|
||||
*/
|
||||
public function execute(Feed $feed, Item $item)
|
||||
{
|
||||
foreach ($this->processors as $processor) {
|
||||
if ($processor->execute($feed, $item)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a new Item post-processor
|
||||
*
|
||||
* @access public
|
||||
* @param ItemProcessorInterface $processor
|
||||
* @return ItemPostProcessor
|
||||
*/
|
||||
public function register(ItemProcessorInterface $processor)
|
||||
{
|
||||
$this->processors[get_class($processor)] = $processor;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove Processor instance
|
||||
*
|
||||
* @access public
|
||||
* @param string $class
|
||||
* @return ItemPostProcessor
|
||||
*/
|
||||
public function unregister($class)
|
||||
{
|
||||
if (isset($this->processors[$class])) {
|
||||
unset($this->processors[$class]);
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks wheather a specific processor is registered or not
|
||||
*
|
||||
* @access public
|
||||
* @param string $class
|
||||
* @return bool
|
||||
*/
|
||||
public function hasProcessor($class)
|
||||
{
|
||||
return isset($this->processors[$class]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Processor instance
|
||||
*
|
||||
* @access public
|
||||
* @param string $class
|
||||
* @return ItemProcessorInterface|null
|
||||
*/
|
||||
public function getProcessor($class)
|
||||
{
|
||||
return isset($this->processors[$class]) ? $this->processors[$class] : null;
|
||||
}
|
||||
}
|
25
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemProcessorInterface.php
vendored
Normal file
25
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemProcessorInterface.php
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Processor;
|
||||
|
||||
use PicoFeed\Parser\Feed;
|
||||
use PicoFeed\Parser\Item;
|
||||
|
||||
/**
|
||||
* Item Processor Interface
|
||||
*
|
||||
* @package PicoFeed\Processor
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
interface ItemProcessorInterface
|
||||
{
|
||||
/**
|
||||
* Execute Item Processor
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed
|
||||
* @param Item $item
|
||||
* @return bool
|
||||
*/
|
||||
public function execute(Feed $feed, Item $item);
|
||||
}
|
96
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php
vendored
Normal file
96
vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php
vendored
Normal file
|
@ -0,0 +1,96 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Processor;
|
||||
|
||||
use Closure;
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Parser\Feed;
|
||||
use PicoFeed\Parser\Item;
|
||||
use PicoFeed\Scraper\Scraper;
|
||||
|
||||
/**
|
||||
* Scraper Processor
|
||||
*
|
||||
* @package PicoFeed\Processor
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class ScraperProcessor extends Base implements ItemProcessorInterface
|
||||
{
|
||||
private $ignoredUrls = array();
|
||||
private $scraper;
|
||||
|
||||
/**
|
||||
* Callback function for each scraper execution
|
||||
*
|
||||
* @var Closure
|
||||
*/
|
||||
private $executionCallback;
|
||||
|
||||
/**
|
||||
* Add a new execution callback
|
||||
*
|
||||
* @access public
|
||||
* @param Closure $executionCallback
|
||||
* @return $this
|
||||
*/
|
||||
public function setExecutionCallback(Closure $executionCallback)
|
||||
{
|
||||
$this->executionCallback = $executionCallback;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute Item Processor
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed
|
||||
* @param Item $item
|
||||
* @return bool
|
||||
*/
|
||||
public function execute(Feed $feed, Item $item)
|
||||
{
|
||||
if (!in_array($item->getUrl(), $this->ignoredUrls)) {
|
||||
$scraper = $this->getScraper();
|
||||
$scraper->setUrl($item->getUrl());
|
||||
$scraper->execute();
|
||||
|
||||
if ($this->executionCallback && is_callable($this->executionCallback)) {
|
||||
call_user_func($this->executionCallback, $feed, $item, $scraper);
|
||||
}
|
||||
|
||||
if ($scraper->hasRelevantContent()) {
|
||||
$item->setContent($scraper->getFilteredContent());
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore list of URLs
|
||||
*
|
||||
* @access public
|
||||
* @param array $urls
|
||||
* @return $this
|
||||
*/
|
||||
public function ignoreUrls(array $urls)
|
||||
{
|
||||
$this->ignoredUrls = $urls;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Scraper instance
|
||||
*
|
||||
* @access public
|
||||
* @return Scraper
|
||||
*/
|
||||
public function getScraper()
|
||||
{
|
||||
if ($this->scraper === null) {
|
||||
$this->scraper = new Scraper($this->config);
|
||||
}
|
||||
|
||||
return $this->scraper;
|
||||
}
|
||||
}
|
190
vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php
vendored
Normal file
190
vendor/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php
vendored
Normal file
|
@ -0,0 +1,190 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Reader;
|
||||
|
||||
use DOMXPath;
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Client\Client;
|
||||
use PicoFeed\Client\ClientException;
|
||||
use PicoFeed\Client\Url;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
* Favicon class.
|
||||
*
|
||||
* https://en.wikipedia.org/wiki/Favicon
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Favicon extends Base
|
||||
{
|
||||
/**
|
||||
* Valid types for favicon (supported by browsers).
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $types = array(
|
||||
'image/png',
|
||||
'image/gif',
|
||||
'image/x-icon',
|
||||
'image/jpeg',
|
||||
'image/jpg',
|
||||
'image/svg+xml'
|
||||
);
|
||||
|
||||
/**
|
||||
* Icon binary content.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $content = '';
|
||||
|
||||
/**
|
||||
* Icon content type.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $content_type = '';
|
||||
|
||||
/**
|
||||
* Get the icon file content (available only after the download).
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon file type (available only after the download).
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getType()
|
||||
{
|
||||
foreach ($this->types as $type) {
|
||||
if (strpos($this->content_type, $type) === 0) {
|
||||
return $type;
|
||||
}
|
||||
}
|
||||
|
||||
return 'image/x-icon';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme).
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getDataUri()
|
||||
{
|
||||
if (empty($this->content)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return sprintf(
|
||||
'data:%s;base64,%s',
|
||||
$this->getType(),
|
||||
base64_encode($this->content)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Download and check if a resource exists.
|
||||
*
|
||||
* @param string $url URL
|
||||
*
|
||||
* @return \PicoFeed\Client Client instance
|
||||
*/
|
||||
public function download($url)
|
||||
{
|
||||
$client = Client::getInstance();
|
||||
$client->setConfig($this->config);
|
||||
|
||||
Logger::setMessage(get_called_class().' Download => '.$url);
|
||||
|
||||
try {
|
||||
$client->execute($url);
|
||||
} catch (ClientException $e) {
|
||||
Logger::setMessage(get_called_class().' Download Failed => '.$e->getMessage());
|
||||
}
|
||||
|
||||
return $client;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a remote file exists.
|
||||
*
|
||||
* @param string $url URL
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function exists($url)
|
||||
{
|
||||
return $this->download($url)->getContent() !== '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon link for a website.
|
||||
*
|
||||
* @param string $website_link URL
|
||||
* @param string $favicon_link optional URL
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function find($website_link, $favicon_link = '')
|
||||
{
|
||||
$website = new Url($website_link);
|
||||
|
||||
if ($favicon_link !== '') {
|
||||
$icons = array($favicon_link);
|
||||
} else {
|
||||
$icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent());
|
||||
$icons[] = $website->getBaseUrl('/favicon.ico');
|
||||
}
|
||||
|
||||
foreach ($icons as $icon_link) {
|
||||
$icon_link = Url::resolve($icon_link, $website);
|
||||
$resource = $this->download($icon_link);
|
||||
$this->content = $resource->getContent();
|
||||
$this->content_type = $resource->getContentType();
|
||||
|
||||
if ($this->content !== '') {
|
||||
return $icon_link;
|
||||
} elseif ($favicon_link !== '') {
|
||||
return $this->find($website_link);
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the icon links from the HTML.
|
||||
*
|
||||
* @param string $html HTML
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function extract($html)
|
||||
{
|
||||
$icons = array();
|
||||
|
||||
if (empty($html)) {
|
||||
return $icons;
|
||||
}
|
||||
|
||||
$dom = XmlParser::getHtmlDocument($html);
|
||||
|
||||
$xpath = new DOMXpath($dom);
|
||||
$elements = $xpath->query('//link[@rel="icon" or @rel="shortcut icon" or @rel="icon shortcut"]');
|
||||
|
||||
for ($i = 0; $i < $elements->length; ++$i) {
|
||||
$icons[] = $elements->item($i)->getAttribute('href');
|
||||
}
|
||||
|
||||
return $icons;
|
||||
}
|
||||
}
|
190
vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php
vendored
Normal file
190
vendor/fguillot/picofeed/lib/PicoFeed/Reader/Reader.php
vendored
Normal file
|
@ -0,0 +1,190 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Reader;
|
||||
|
||||
use DOMXPath;
|
||||
use PicoFeed\Base;
|
||||
use PicoFeed\Client\Client;
|
||||
use PicoFeed\Client\Url;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
* Reader class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class Reader extends Base
|
||||
{
|
||||
/**
|
||||
* Feed formats for detection.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $formats = array(
|
||||
'Atom' => '//feed',
|
||||
'Rss20' => '//rss[@version="2.0"]',
|
||||
'Rss92' => '//rss[@version="0.92"]',
|
||||
'Rss91' => '//rss[@version="0.91"]',
|
||||
'Rss10' => '//rdf',
|
||||
);
|
||||
|
||||
/**
|
||||
* Download a feed (no discovery).
|
||||
*
|
||||
* @param string $url Feed url
|
||||
* @param string $last_modified Last modified HTTP header
|
||||
* @param string $etag Etag HTTP header
|
||||
* @param string $username HTTP basic auth username
|
||||
* @param string $password HTTP basic auth password
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function download($url, $last_modified = '', $etag = '', $username = '', $password = '')
|
||||
{
|
||||
$url = $this->prependScheme($url);
|
||||
|
||||
return Client::getInstance()
|
||||
->setConfig($this->config)
|
||||
->setLastModified($last_modified)
|
||||
->setEtag($etag)
|
||||
->setUsername($username)
|
||||
->setPassword($password)
|
||||
->execute($url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover and download a feed.
|
||||
*
|
||||
* @param string $url Feed or website url
|
||||
* @param string $last_modified Last modified HTTP header
|
||||
* @param string $etag Etag HTTP header
|
||||
* @param string $username HTTP basic auth username
|
||||
* @param string $password HTTP basic auth password
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function discover($url, $last_modified = '', $etag = '', $username = '', $password = '')
|
||||
{
|
||||
$client = $this->download($url, $last_modified, $etag, $username, $password);
|
||||
|
||||
// It's already a feed or the feed was not modified
|
||||
if (!$client->isModified() || $this->detectFormat($client->getContent())) {
|
||||
return $client;
|
||||
}
|
||||
|
||||
// Try to find a subscription
|
||||
$links = $this->find($client->getUrl(), $client->getContent());
|
||||
|
||||
if (empty($links)) {
|
||||
throw new SubscriptionNotFoundException('Unable to find a subscription');
|
||||
}
|
||||
|
||||
return $this->download($links[0], $last_modified, $etag, $username, $password);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find feed urls inside a HTML document.
|
||||
*
|
||||
* @param string $url Website url
|
||||
* @param string $html HTML content
|
||||
*
|
||||
* @return array List of feed links
|
||||
*/
|
||||
public function find($url, $html)
|
||||
{
|
||||
Logger::setMessage(get_called_class().': Try to discover subscriptions');
|
||||
|
||||
$dom = XmlParser::getHtmlDocument($html);
|
||||
$xpath = new DOMXPath($dom);
|
||||
$links = array();
|
||||
|
||||
$queries = array(
|
||||
'//link[@type="application/rss+xml"]',
|
||||
'//link[@type="application/atom+xml"]',
|
||||
);
|
||||
|
||||
foreach ($queries as $query) {
|
||||
$nodes = $xpath->query($query);
|
||||
|
||||
foreach ($nodes as $node) {
|
||||
$link = $node->getAttribute('href');
|
||||
|
||||
if (!empty($link)) {
|
||||
$feedUrl = new Url($link);
|
||||
$siteUrl = new Url($url);
|
||||
|
||||
$links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : '');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Logger::setMessage(get_called_class().': '.implode(', ', $links));
|
||||
|
||||
return $links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a parser instance.
|
||||
*
|
||||
* @param string $url Site url
|
||||
* @param string $content Feed content
|
||||
* @param string $encoding HTTP encoding
|
||||
*
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function getParser($url, $content, $encoding)
|
||||
{
|
||||
$format = $this->detectFormat($content);
|
||||
|
||||
if (empty($format)) {
|
||||
throw new UnsupportedFeedFormatException('Unable to detect feed format');
|
||||
}
|
||||
|
||||
$className = '\PicoFeed\Parser\\'.$format;
|
||||
|
||||
$parser = new $className($content, $encoding, $url);
|
||||
$parser->setHashAlgo($this->config->getParserHashAlgo());
|
||||
$parser->setConfig($this->config);
|
||||
|
||||
return $parser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the feed format.
|
||||
*
|
||||
* @param string $content Feed content
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function detectFormat($content)
|
||||
{
|
||||
$dom = XmlParser::getHtmlDocument($content);
|
||||
$xpath = new DOMXPath($dom);
|
||||
|
||||
foreach ($this->formats as $parser_name => $query) {
|
||||
$nodes = $xpath->query($query);
|
||||
|
||||
if ($nodes->length === 1) {
|
||||
return $parser_name;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the prefix "http://" if the end-user just enter a domain name.
|
||||
*
|
||||
* @param string $url Url
|
||||
* @retunr string
|
||||
*/
|
||||
public function prependScheme($url)
|
||||
{
|
||||
if (!preg_match('%^https?://%', $url)) {
|
||||
$url = 'http://'.$url;
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
}
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Reader/ReaderException.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Reader/ReaderException.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Reader;
|
||||
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
/**
|
||||
* ReaderException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
abstract class ReaderException extends PicoFeedException
|
||||
{
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Reader/SubscriptionNotFoundException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Reader/SubscriptionNotFoundException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Reader;
|
||||
|
||||
/**
|
||||
* SubscriptionNotFoundException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class SubscriptionNotFoundException extends ReaderException
|
||||
{
|
||||
}
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Reader/UnsupportedFeedFormatException.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Reader/UnsupportedFeedFormatException.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Reader;
|
||||
|
||||
/**
|
||||
* UnsupportedFeedFormatException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
*/
|
||||
class UnsupportedFeedFormatException extends ReaderException
|
||||
{
|
||||
}
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208',
|
||||
'body' => array(
|
||||
'//div[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//*[contains(@class, "fb-like") or contains(@class, "social")]'
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php
vendored
Normal file
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'title' => '//header/h1',
|
||||
'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/',
|
||||
'body' => array(
|
||||
'//div[@class="postContent"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//*[@class="shareToolsBox"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php
vendored
Normal file
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "field-name-body")]'
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php
vendored
Normal file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html',
|
||||
'body' => array(
|
||||
'//div[@class="articleBody"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.over-blog.com.php
vendored
Normal file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.over-blog.com.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://eliascarpe.over-blog.com/2015/12/re-upload-projets-d-avenir.html',
|
||||
'body' => array(
|
||||
'//div[contains(concat(" ", normalize-space(@class), " "), " ob-section ")]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1',
|
||||
'body' => array(
|
||||
'//div[@class="content"]',
|
||||
),
|
||||
'strip' => array()
|
||||
)
|
||||
)
|
||||
);
|
20
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php
vendored
Normal file
20
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html',
|
||||
'body' => array(
|
||||
'//div[@class="sl-art-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]',
|
||||
'//*[@id="mys_slate_logged_in"]',
|
||||
'//*[@id="sl_article_tools_myslate_bottom"]',
|
||||
'//*[@id="mys_myslate"]',
|
||||
'//*[@class="sl-viral-container"]',
|
||||
'//*[@class="sl-art-creds-cntr"]',
|
||||
'//*[@class="sl-art-ad-midflex"]',
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.theguardian.com/sustainable-business/2015/feb/02/2015-hyper-transparency-global-business',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "content__main-column--article")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[contains(@class, "meta-container")]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
29
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php
vendored
Normal file
29
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'https://en.wikipedia.org/wiki/Grace_Hopper',
|
||||
'body' => array(
|
||||
'//div[@id="bodyContent"]',
|
||||
),
|
||||
'strip' => array(
|
||||
"//div[@id='toc']",
|
||||
"//div[@id='catlinks']",
|
||||
"//div[@id='jump-to-nav']",
|
||||
"//div[@class='thumbcaption']//div[@class='magnify']",
|
||||
"//table[@class='navbox']",
|
||||
"//table[contains(@class, 'infobox')]",
|
||||
"//div[@class='dablink']",
|
||||
"//div[@id='contentSub']",
|
||||
"//div[@id='siteSub']",
|
||||
"//table[@id='persondata']",
|
||||
"//table[contains(@class, 'metadata')]",
|
||||
"//*[contains(@class, 'noprint')]",
|
||||
"//*[contains(@class, 'printfooter')]",
|
||||
"//*[contains(@class, 'editsection')]",
|
||||
"//*[contains(@class, 'error')]",
|
||||
"//span[@title='pronunciation:']",
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
31
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php
vendored
Normal file
31
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php
vendored
Normal file
|
@ -0,0 +1,31 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/',
|
||||
'body' => array(
|
||||
'//div[@data-js="gallerySlides"]',
|
||||
'//article',
|
||||
),
|
||||
'strip' => array(
|
||||
'//*[@id="linker_widget"]',
|
||||
'//*[@class="credit"]',
|
||||
'//div[@data-js="slideCount"]',
|
||||
'//*[contains(@class="visually-hidden")]',
|
||||
'//*[@data-slide-number="_endslate"]',
|
||||
'//*[@id="related"]',
|
||||
'//*[contains(@class, "bio")]',
|
||||
'//*[contains(@class, "entry-footer")]',
|
||||
'//*[contains(@class, "mobify_backtotop_link")]',
|
||||
'//*[contains(@class, "gallery-navigation")]',
|
||||
'//*[contains(@class, "gallery-thumbnail")]',
|
||||
'//img[contains(@src, "1x1")]',
|
||||
'//a[contains(@href, "creativecommons")]',
|
||||
'//a[@href="#start-of-content"]',
|
||||
'//ul[@id="article-tags"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
|
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php
vendored
Normal file
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html',
|
||||
'body' => array(
|
||||
'//div[@class="articlePage"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//*[@id="articleThumbnail_2"]',
|
||||
'//*[@class="socialByline"]',
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php
vendored
Normal file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.01net.com/editorial/624550/twitter-rachete-madbits-un-specialiste-francais-de-lanalyse-dimages/',
|
||||
'body' => array(
|
||||
'//div[@class="article_ventre_box"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//link',
|
||||
'//*[contains(@class, "article_navigation")]',
|
||||
'//h1',
|
||||
'//*[contains(@class, "article_toolbarMain")]',
|
||||
'//*[contains(@class, "article_imagehaute_box")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/abstrusegoose.com.php
vendored
Normal file
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/abstrusegoose.com.php
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%alt="(.+)" title="(.+)" */>%' => '/><br/>$1<br/>$2',
|
||||
),
|
||||
),
|
||||
);
|
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php
vendored
Normal file
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907',
|
||||
'body' => array(
|
||||
'//div[@class="news_details"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="news_details"]/div/div[last()]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/aljazeera.com.php
vendored
Normal file
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/aljazeera.com.php
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html',
|
||||
'body' => array(
|
||||
'//figure[@class="article-content"]',
|
||||
'//div[@class="article-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h1',
|
||||
'//h3',
|
||||
'//ul',
|
||||
'//table[contains(@class, "in-article-item")]',
|
||||
'//a[@target="_self"]',
|
||||
'//div[@data-embed-type="Brightcove"]',
|
||||
'//div[@class="QuoteContainer"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
20
vendor/fguillot/picofeed/lib/PicoFeed/Rules/allafrica.com.php
vendored
Normal file
20
vendor/fguillot/picofeed/lib/PicoFeed/Rules/allafrica.com.php
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html',
|
||||
'body' => array(
|
||||
'//div[@class="story-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//p[@class="kindofstory"]',
|
||||
'//cite[@class="byline"]',
|
||||
'//div[contains(@class,"related-topics")]',
|
||||
'//links',
|
||||
'//sharebar',
|
||||
'//related-topics',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php
vendored
Normal file
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "article")][1]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//read/h1',
|
||||
'//*[@id="t-map"]',
|
||||
'//*[contains(@class, "modules")]',
|
||||
'//*[contains(@class, "adsense")]',
|
||||
'//*[contains(@class, "linkbox")]',
|
||||
'//*[contains(@class, "info")]',
|
||||
'//*[@class="skip"]',
|
||||
'//*[@class="funcs"]',
|
||||
'//span[@class="nd address"]',
|
||||
'//a[contains(@href, "abo-und-services")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/amazingsuperpowers.com.php
vendored
Normal file
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/amazingsuperpowers.com.php
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%title="(.+)" */>%' => '/><br/>$1',
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/anythingcomic.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/anythingcomic.com.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'body' => array(
|
||||
'//img[@id="comic_image"]',
|
||||
'//div[@class="comment-wrapper"][position()=1]',
|
||||
),
|
||||
'strip' => array(),
|
||||
'test_url' => 'http://www.anythingcomic.com/comics/2108929/stress-free/',
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/ap.org.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/ap.org.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://hosted.ap.org/dynamic/stories/A/AS_CHINA_GAO_ZHISHENG?SITE=AP&SECTION=HOME&TEMPLATE=DEFAULT',
|
||||
'body' => array(
|
||||
'//img[@class="ap-smallphoto-img"]',
|
||||
'//span[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php
vendored
Normal file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.areadvd.de/news/daily-deals-angebote-bei-lautsprecher-teufel-3/',
|
||||
'body' => array('//div[contains(@class,"entry")]'),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/arstechnica.com.php
vendored
Normal file
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/arstechnica.com.php
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://arstechnica.com/tech-policy/2015/09/judge-warners-2m-happy-birthday-copyright-is-bogus/',
|
||||
'body' => array(
|
||||
'//header/h2',
|
||||
'//section[@id="article-guts"]',
|
||||
'//div[@class="superscroll-content show"]',
|
||||
'//div[@class="gallery"]',
|
||||
),
|
||||
'next_page' => '//span[@class="numbers"]/a',
|
||||
'strip' => array(
|
||||
'//figcaption',
|
||||
'//div[@class="post-meta"]',
|
||||
'//div[@class="gallery-image-credit"]',
|
||||
'//aside',
|
||||
'//div[@class="article-expander"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php
vendored
Normal file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/index.php.*comic=.*%' => array(
|
||||
'test_url' => 'http://www.awkwardzombie.com/index.php?comic=041315',
|
||||
'body' => array('//*[@id="comic"]/img'),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bangkokpost.com.php
vendored
Normal file
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bangkokpost.com.php
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.bangkokpost.com/news/politics/704204/new-us-ambassador-arrives-in-bangkok',
|
||||
'body' => array(
|
||||
'//div[@class="articleContents"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h2',
|
||||
'//h4',
|
||||
'//div[@class="text-size"]',
|
||||
'//div[@class="relate-story"]',
|
||||
'//div[@class="text-ads"]',
|
||||
'//script',
|
||||
'//ul',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bgr.com.php
vendored
Normal file
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bgr.com.php
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://bgr.com/2015/09/27/iphone-6s-waterproof-testing/',
|
||||
'body' => array(
|
||||
'//img[contains(@class,"img")]',
|
||||
'//div[@class="text-column"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//strong',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigfootjustice.com.php
vendored
Normal file
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigfootjustice.com.php
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%-150x150%' => '',
|
||||
),
|
||||
),
|
||||
);
|
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bizjournals.com.php
vendored
Normal file
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bizjournals.com.php
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.bizjournals.com/milwaukee/news/2015/09/30/bucks-will-hike-prices-on-best-seats-at-new-arena.html',
|
||||
'body' => array(
|
||||
'//figure/div/a/img',
|
||||
'//p[@class="content__segment"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://blog.fefe.de/?ts=ad706a73',
|
||||
'body' => array(
|
||||
'/html/body/ul',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.mapillary.com.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.mapillary.com.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://blog.mapillary.com/update/2015/08/26/traffic-sign-updates.html',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "blog-post__content")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
18
vendor/fguillot/picofeed/lib/PicoFeed/Rules/buenosairesherald.com.php
vendored
Normal file
18
vendor/fguillot/picofeed/lib/PicoFeed/Rules/buenosairesherald.com.php
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.buenosairesherald.com/article/199344/manzur-named-next-governor-of-tucum%C3%A1n',
|
||||
'body' => array(
|
||||
'//div[@style="float:none"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[contains(@class, "bz_alias_short_desc_container"]',
|
||||
'//td[@id="bz_show_bug_column_1"]',
|
||||
'//table[@id="attachment_table"]',
|
||||
'//table[@class="bz_comment_table"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.bunicomic.com/comic/buni-623/',
|
||||
'body' => array(
|
||||
'//div[@class="comic-table"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/buttersafe.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/buttersafe.com.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://buttersafe.com/2015/04/21/the-incredible-flexible-man/',
|
||||
'body' => array(
|
||||
'//div[@id="comic"]',
|
||||
'//div[@class="post-comic"]',
|
||||
),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php
vendored
Normal file
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/cad/.+%' => array(
|
||||
'test_url' => 'http://www.cad-comic.com/cad/20150417',
|
||||
'body' => array(
|
||||
'//*[@id="content"]/img',
|
||||
),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php
vendored
Normal file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://chaoslife.findchaos.com/pets-in-the-wild',
|
||||
'body' => array('//div[@id="comic"]'),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php
vendored
Normal file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/comic.*%' => array(
|
||||
'test_url' => 'http://cliquerefresh.com/comic/078-stating-the-obvious/',
|
||||
'body' => array('//div[@class="comicImg"]/img | //div[@class="comicImg"]/a/img'),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
38
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cnet.com.php
vendored
Normal file
38
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cnet.com.php
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%^/products.*%' => array(
|
||||
'test_url' => 'http://www.cnet.com/products/fibaro-flood-sensor/#ftag=CADf328eec',
|
||||
'body' => array(
|
||||
'//li[contains(@class,"slide first"] || //figure[contains(@class,(promoFigure))]',
|
||||
'//div[@class="quickInfo"]',
|
||||
'//div[@class="col-6 ratings"]',
|
||||
'//div[@id="editorReview"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//a[@class="clickToEnlarge"]',
|
||||
'//div[@section="topSharebar"]',
|
||||
'//div[contains(@class,"related")]',
|
||||
'//div[contains(@class,"ad-")]',
|
||||
'//div[@section="shortcodeGallery"]',
|
||||
),
|
||||
),
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://cnet.com.feedsportal.com/c/34938/f/645093/s/4a340866/sc/28/l/0L0Scnet0N0Cnews0Cman0Eclaims0Eonline0Epsychic0Emade0Ehim0Ebuy0E10Emillion0Epowerball0Ewinning0Eticket0C0Tftag0FCAD590Aa51e/story01.htm',
|
||||
'body' => array(
|
||||
'//p[@itemprop="description"]',
|
||||
'//div[@itemprop="articleBody"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//a[@class="clickToEnlarge"]',
|
||||
'//div[@section="topSharebar"]',
|
||||
'//div[contains(@class,"related")]',
|
||||
'//div[contains(@class,"ad-")]',
|
||||
'//div[@section="shortcodeGallery"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html',
|
||||
'body' => array(
|
||||
'//div[contains(@id, "newscontent")]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cowbirdsinlove.com.php
vendored
Normal file
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/cowbirdsinlove.com.php
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%title="(.+)" */>%' => '/><br/>$1',
|
||||
),
|
||||
),
|
||||
);
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/csmonitor.com.php
vendored
Normal file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/csmonitor.com.php
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.csmonitor.com/USA/Politics/2015/0925/John-Boehner-steps-down-Self-sacrificing-but-will-it-lead-to-better-government',
|
||||
'body' => array(
|
||||
'//figure[@id="image-top-1"]',
|
||||
'//div[@id="story-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//img[@title="hide caption"]',
|
||||
'//*[contains(@class,"promo_link")]',
|
||||
'//div[@id="story-embed-column"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
20
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php
vendored
Normal file
20
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://dailyjs.com/2014/08/07/p5js/',
|
||||
'body' => array(
|
||||
'//div[@id="post"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h2[@class="post"]',
|
||||
'//div[@class="meta"]',
|
||||
'//*[contains(@class, "addthis_toolbox")]',
|
||||
'//*[contains(@class, "addthis_default_style")]',
|
||||
'//*[@class="navigation small"]',
|
||||
'//*[@id="related"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyreporter.com.php
vendored
Normal file
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyreporter.com.php
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://dailyreporter.com/2016/01/09/us-supreme-court-case-could-weaken-government-workers-unions/',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "entry-content")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="dmcss_login_form"]',
|
||||
'//*[contains(@class, "sharedaddy")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailytech.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailytech.com.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.dailytech.com/Apples+First+Fixes+to+iOS+9+Land+w+iOS++901+Release/article37495.htm',
|
||||
'body' => array(
|
||||
'//div[@class="NewsBodyImage"]',
|
||||
'//span[@id="lblSummary"]',
|
||||
'//span[@id="lblBody"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php
vendored
Normal file
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick',
|
||||
'body' => array(
|
||||
'//div[@class="contenu"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[contains(@class, "a2a")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php
vendored
Normal file
15
vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://derstandard.at/2000010267354/The-Witcher-3-Hohe-Hardware-Anforderungen-fuer-PC-Spieler?ref=rss',
|
||||
'body' => array(
|
||||
'//div[@class="copytext"]',
|
||||
'//ul[@id="media-list"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dilbert.com.php
vendored
Normal file
12
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dilbert.com.php
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'body' => array(
|
||||
'//img[@class="img-responsive img-comic"]',
|
||||
),
|
||||
'test_url' => 'http://dilbert.com/strip/2016-01-28',
|
||||
),
|
||||
),
|
||||
);
|
18
vendor/fguillot/picofeed/lib/PicoFeed/Rules/discovermagazine.com.php
vendored
Normal file
18
vendor/fguillot/picofeed/lib/PicoFeed/Rules/discovermagazine.com.php
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://blogs.discovermagazine.com/the-extremo-files/2015/09/11/have-scientists-found-the-worlds-deepest-fish/',
|
||||
'body' => array(
|
||||
'//div[@class="entry"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h1',
|
||||
'//div[@class="meta"]',
|
||||
'//div[@class="shareIcons"]',
|
||||
'//div[@class="navigation"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://distrowatch.com/?newsid=08355',
|
||||
'body' => array(
|
||||
'//td[@class="NewsText"][1]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php
vendored
Normal file
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://dozodomo.com/bento/2014/03/04/lart-des-maki-de-takayo-kiyota/',
|
||||
'body' => array(
|
||||
'//div[@class="joke"]',
|
||||
'//div[@class="story-cover"]',
|
||||
'//div[@class="story-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/drawingboardcomic.com.php
vendored
Normal file
16
vendor/fguillot/picofeed/lib/PicoFeed/Rules/drawingboardcomic.com.php
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'body' => array('//img[@id="comicimage"]'),
|
||||
'strip' => array(),
|
||||
'test_url' => 'http://drawingboardcomic.com/index.php?comic=208',
|
||||
),
|
||||
),
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%title="(.+)" */>%' => '/><br/>$1',
|
||||
),
|
||||
),
|
||||
);
|
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/encyclopedie.naheulbeuk.com.php
vendored
Normal file
13
vendor/fguillot/picofeed/lib/PicoFeed/Rules/encyclopedie.naheulbeuk.com.php
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://encyclopedie.naheulbeuk.com/article.php3?id_article=352',
|
||||
'body' => array(
|
||||
'//td//h1[@class="titre-texte"]',
|
||||
'//td//div[@class="surtitre"]',
|
||||
'//td//div[@class="texte"]',
|
||||
),
|
||||
)
|
||||
),
|
||||
);
|
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/endlessorigami.com.php
vendored
Normal file
9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/endlessorigami.com.php
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%-150x150%' => '',
|
||||
),
|
||||
),
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php
vendored
Normal file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated',
|
||||
'body' => array('//div[@id="page_body"]/div[@class="container@m-"]'),
|
||||
'strip' => array('//aside[@role="banner"]'),
|
||||
),
|
||||
),
|
||||
);
|
Some files were not shown because too many files have changed in this diff Show more
Reference in a new issue