I want to create a crawler who take the informations of all pages one by one from number 0 to 10 000 000. I don't matter how much time it takes. I just want it works. Here is the error i obtain
Fatal error: Maximum function nesting level of '100' reached, aborting! in D:\wamp\www\crawler\index.php on line 25
The line 25 is
$htmlstr = (string)$this->curlGet($url);
And there is my complete script.
Thank you for your help !
header('Content-Type: text/html; charset=utf-8');
ini_set('max_input_nesting_level','100000');
ini_set('max_execution_time','100000');
class crawler{
private $url;
private $page;
private $bothurl;
private $innerDom = null;
public $prop;
public $entry;
function __construct($entry){
$this->entry = $entry;
$this->bothurl = array('http://www.remax-quebec.com/fr/inscription/Q/'.$entry.'.rmx','http://www.remax-quebec.com/en/inscription/Q/'.$entry.'.rmx');
$this->scan();
}
private function scan(){
$i =0;
foreach($this->bothurl as $url){
$this->url = $url;
$this->lang = ($i==0)?'fr':'en';
$htmlstr = (string)$this->curlGet($url);
$dom = new DOMDocument;
@$dom->loadHTML($htmlstr);
$this->page = $dom;
$this->htmlInfos();
$this->getInfos();
$i++;
}
}
private function htmlInfos(){
$divs = $this->page->getElementsByTagName('div');
foreach($divs as $div){
if($div->hasAttribute('class') && $div->getAttribute('class') == 'bloc specs'){
$innerDom = new DOMDocument();
@$innerDom->loadHTML($this->innerHTML($div));
$this->innerDom = $innerDom;
}
}
if($this->innerDom === null) $this->changeEntry();
}
private function getInfos(){
$sect = 0;
foreach($this->innerDom->getElementsByTagName('div') as $div){
# obtenir la description
$this->getDesc($div->getAttribute('class'),$div);
# obtenir les caractéristiques
$this->getCaract($div->getAttribute('class'),$div);
# obtenir les informations interieur, exterieur et evaluation
if($div->getAttribute('class') == 'section deux-colonnes'){
switch($sect){
case 0: $this->getSpecInfos($div,'interieur'); break;
case 1: $this->getSpecInfos($div,'exterieur'); break;
case 2: $this->getSpecInfos($div,'evaluation'); break;
case 3: $this->getSpecInfos($div,'equipement'); break;
case 4: $this->getSpecInfos($div,'services'); break;
}
$sect++;
}else if($div->getAttribute('class') == 'section'){
# obtenir les détails des pièces
foreach($div->getElementsByTagName('table') as $table){
if($table->getAttribute('class') == 'details-pieces'){
$this->detailPieces($table);
}
}
}
}
}
private function getDesc($class,$obj){
if($class == 'section description'){
$p = $obj->getElementsByTagName('p')->item(0);
$text = (string)$p->nodeValue;
$this->prop[$this->lang]['description'] = $text;
}
}
private function getCaract($class,$obj){
if($class == 'section characteristiques'){
foreach($obj->getElementsByTagName('div') as $div){
if(substr($div->getAttribute('class'),0,4) == "item"){
$text = (string)$div->nodeValue;
$this->prop[$this->lang]['caracteritiques'][substr($div->getAttribute('class'),5)] = $text;
}
}
}
}
private function getSpecInfos($obj,$nomInfo){
foreach($obj->getElementsByTagName('table') as $table){
foreach($table->getElementsByTagName('tr') as $tr){
$name = $tr->getElementsByTagName('td')->item(0);
$value = $tr->getElementsByTagName('td')->item(1);
$name = substr((string)$name->nodeValue,0,-2);
$value = (string)$value->nodeValue;
$this->prop[$this->lang][$nomInfo][$this->noAccents($name)] = $value;
}
}
}
private function detailPieces($obj){
$tbody = $obj->getElementsByTagName('tbody')->item(0);
foreach($tbody->getElementsByTagName('tr') as $tr){
$name = $tr->getElementsByTagName('td')->item(0);
$name = (string)$name->nodeValue;
$level = $tr->getElementsByTagName('td')->item(1);
$level = (string)$level->nodeValue;
$dimensions = $tr->getElementsByTagName('td')->item(2);
$dimensions = (string)$dimensions->nodeValue;
$floor = $tr->getElementsByTagName('td')->item(3);
$floor = (string)$floor->nodeValue;
$desc = $tr->getElementsByTagName('td')->item(4);
$desc = (string)$desc->nodeValue;
$this->prop[$this->lang]['pieces'][$this->noAccents($name)]['etage'] = $level;
$this->prop[$this->lang]['pieces'][$this->noAccents($name)]['dimensions'] = $dimensions;
$this->prop[$this->lang]['pieces'][$this->noAccents($name)]['revetement'] = $floor;
$this->prop[$this->lang]['pieces'][$this->noAccents($name)]['description'] = $desc;
}
}
private function innerHTML($element){
$innerHTML = "";
$children = $element->childNodes;
foreach ($children as $child)
{
$tmp_dom = new DOMDocument();
$tmp_dom->appendChild($tmp_dom->importNode($child, true));
$innerHTML.=trim($tmp_dom->saveHTML());
}
return $innerHTML;
}
private function noAccents($value){
$string= strtr($chaine,"ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ","aaaaaaaaaaaaooooooooooooeeeeeeeecciiiiiiiiuuuuuuuuynn");
}
private function changeEntry(){
$this->entry++;
echo $this->entry;
$this->scan();
}
private function curlGet($url){
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_ENCODING, "gzip");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($curl);
curl_close($curl);
return $data;
}
}
$entry = 8678057;
$crawler = new crawler($entry);
echo '<pre>';
print_r($crawler->prop);
echo '</pre>';
Assuming you're using xdebug, you can set your own limit with
ini_set('xdebug.max_nesting_level', $limit)
changes the file /etc/mysql/my.cnf parameter to something like that max_allowed_packet = 512M
Get sure you've got xdebug installed (use phpinfo()) and then change the file /etc/php5/fpm/php.ini adding or editing the line : xdebug.max_nesting_level=1000
Restart both services sudo service mysql restart sudo service php5-fpm restart
If it doesn't work you can still set those two parameters to false at /etc/php5/fpm/php.ini xdebug.remote_autostart=0 xdebug.remote_enable=0
In my case, it was related to composer. Some vendors were updated in the composer.json file, but I forgot to run the commands composer update nor composer install. The system generated a cascade of errros, which was causing this 'maximum nested level'.
After executing those commands, the problem was fixed
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With