dom = XmlParser::getHtmlDocument(''.$html);
$this->xpath = new DOMXPath($this->dom);
}
/**
* Get the relevant content with the list of potential attributes.
*
* @return string
*/
public function execute()
{
$content = $this->findContentWithCandidates();
if (strlen($content) < 200) {
$content = $this->findContentWithArticle();
}
if (strlen($content) < 50) {
$content = $this->findContentWithBody();
}
return $this->stripGarbage($content);
}
/**
* Find content based on the list of tag candidates.
*
* @return string
*/
public function findContentWithCandidates()
{
foreach ($this->candidatesAttributes as $candidate) {
Logger::setMessage(get_called_class().': Try this candidate: "'.$candidate.'"');
$nodes = $this->xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]');
if ($nodes !== false && $nodes->length > 0) {
Logger::setMessage(get_called_class().': Find candidate "'.$candidate.'"');
return $this->dom->saveXML($nodes->item(0));
}
}
return '';
}
/**
* Find tag.
*
* @return string
*/
public function findContentWithArticle()
{
$nodes = $this->xpath->query('//article');
if ($nodes !== false && $nodes->length > 0) {
Logger::setMessage(get_called_class().': Find tag');
return $this->dom->saveXML($nodes->item(0));
}
return '';
}
/**
* Find