Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)

Paste

Pasted as PHP by dasdas ( 15 years ago )
function parseCategories($urls, $depth = 1)
{

    // assign products to category
    //    parseProducts($parentCategoryId, $html, $url);
    $urls_to_follow = array();
    foreach ($urls as $bnodei) {
        $html = file_get_html($bnodei->url);
        $leftNavContainer = $html->find('div[id=leftNavContainer]', 0)->childNodes();
        $html = null;
        unset($html);
        // getting 5 element which is <div style="float: left; width: 170px;">  and it contains all left menu
        $leftNavContainer = $leftNavContainer[5];
        $categories = false;
        $linkNodes = $leftNavContainer->childNodes();
        $leftNavContainer = null;
        unset($leftNavContainer);
        foreach ($linkNodes as $menuNode) {
            // checking menu sections name
            if ($categories && $menuNode->attr['class'] == 'leftnav_cathead') {
                break; // found another section after categories. Stop processing
            }
            if ($menuNode->attr['class'] == 'leftnav_cathead') {
                if ($menuNode->find('h5', 0)->plaintext == 'Categories') {
                    $categories = true; // categories section found
                    continue;
                }
            } elseif (!$categories) {
                continue; // not in category section , skipping
            }
            if ($menuNode->attr['class'] == 'leftnav_outsidebox') {

                if (!method_exists($menuNode, 'find')) {
                    return;
                }

                // div[class=leftnav_narrowby_links] div[class=leftnav_narrowby_links] - fix for double save category. Nested structure on site.
                foreach ($menuNode->find('div[class=leftnav_narrowby_links] div[class=leftnav_narrowby_links]') as $divCategory) {

                    $categoryTitle = $divCategory->first_child()->plaintext;
                    $categoryTitle = preg_replace('/s([0-9]+)$/', '', $categoryTitle);
                    // save the category
                    $categoryId = saveCategory($categoryTitle, $bnodei->parentId);

                    if (!empty($divCategory->first_child()->attr['href'])) {


                        if(IS_LOCAL_COPY == 1) {
                            $categoryURL = dirname(dirname(__FILE__)) . '/src/www.buycostumes.com' . $divCategory->first_child()->attr['href'];
                        }else {
                            $categoryURL = HOST_URL . $divCategory->first_child()->attr['href'];
                        }
                        reportStatus(str_repeat(' ', $depth * 5) . "[{$depth}] Recurse into $categoryTitle");
                        $fp = fopen&#40;$categoryURL, 'r'&#41;;
                        // recurse if the page exists
                        if ($fp) {
                            fclose($fp);
                            $bnode = new BNodeInfo();
                            $bnode->parentId = $categoryId;
                            $bnode->url =  $categoryURL;
                            $urls_to_follow[] = $bnode;
                            //                        parseCategories($categoryURL, $categoryId, $depth + 1);
                        }
                    }
                }
            }
        }
    }
    $linkNodes = null;
    unset($linkNodes);
    $menuNode = null;
    unset($menuNode);
    $divCategory = null;
    unset($divCategory);
    if(sizeof($urls_to_follow) > 0 ) {
        return $urls_to_follow;
    } else {
        return false;
    }
}

 

Revise this Paste

Your Name: Code Language: