Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)
Paste
Pasted as PHP by dasdas ( 15 years ago )
function parseCategories($urls, $depth = 1)
{
// assign products to category
// parseProducts($parentCategoryId, $html, $url);
$urls_to_follow = array();
foreach ($urls as $bnodei) {
$html = file_get_html($bnodei->url);
$leftNavContainer = $html->find('div[id=leftNavContainer]', 0)->childNodes();
$html = null;
unset($html);
// getting 5 element which is <div style="float: left; width: 170px;"> and it contains all left menu
$leftNavContainer = $leftNavContainer[5];
$categories = false;
$linkNodes = $leftNavContainer->childNodes();
$leftNavContainer = null;
unset($leftNavContainer);
foreach ($linkNodes as $menuNode) {
// checking menu sections name
if ($categories && $menuNode->attr['class'] == 'leftnav_cathead') {
break; // found another section after categories. Stop processing
}
if ($menuNode->attr['class'] == 'leftnav_cathead') {
if ($menuNode->find('h5', 0)->plaintext == 'Categories') {
$categories = true; // categories section found
continue;
}
} elseif (!$categories) {
continue; // not in category section , skipping
}
if ($menuNode->attr['class'] == 'leftnav_outsidebox') {
if (!method_exists($menuNode, 'find')) {
return;
}
// div[class=leftnav_narrowby_links] div[class=leftnav_narrowby_links] - fix for double save category. Nested structure on site.
foreach ($menuNode->find('div[class=leftnav_narrowby_links] div[class=leftnav_narrowby_links]') as $divCategory) {
$categoryTitle = $divCategory->first_child()->plaintext;
$categoryTitle = preg_replace('/s([0-9]+)$/', '', $categoryTitle);
// save the category
$categoryId = saveCategory($categoryTitle, $bnodei->parentId);
if (!empty($divCategory->first_child()->attr['href'])) {
if(IS_LOCAL_COPY == 1) {
$categoryURL = dirname(dirname(__FILE__)) . '/src/www.buycostumes.com' . $divCategory->first_child()->attr['href'];
}else {
$categoryURL = HOST_URL . $divCategory->first_child()->attr['href'];
}
reportStatus(str_repeat(' ', $depth * 5) . "[{$depth}] Recurse into $categoryTitle");
$fp = fopen($categoryURL, 'r');
// recurse if the page exists
if ($fp) {
fclose($fp);
$bnode = new BNodeInfo();
$bnode->parentId = $categoryId;
$bnode->url = $categoryURL;
$urls_to_follow[] = $bnode;
// parseCategories($categoryURL, $categoryId, $depth + 1);
}
}
}
}
}
}
$linkNodes = null;
unset($linkNodes);
$menuNode = null;
unset($menuNode);
$divCategory = null;
unset($divCategory);
if(sizeof($urls_to_follow) > 0 ) {
return $urls_to_follow;
} else {
return false;
}
}
Revise this Paste