Dynadot โ€” .com Registration $8.99

Need urgent help on grab rss

Spaceship Spaceship
Watch

tandiono

Tandiono.comEstablished Member
Impact
0
Im trying to grab rss from 2rss.com but it appear to be there's something wrong with the code. Really appreciate if anyone can help me out.

Here's the php code :



PHP:
<?

set_time_limit(999999999);

error_reporting(0);

require_once('magpierss/rss_fetch.inc');

$cfg = parse_ini_file("../conf/config.ini", true);

$base = dirname(__FILE__);

	if (!count($cfg)) { die("Cannot parse config.ini file"); };

	foreach ($cfg as $cfgk=>$cfgv)

	{

		foreach ($cfgv as $cfgfk=>$cfgfv)

		{

			$cfgfk = strtolower($cfgfk);

			$cfgf["{$cfgk}/{$cfgfk}"] = $cfgfv;

		}

	}

	$cfg = array_merge($cfg, $cfgf);



//$db_host = "localhost";

//$db_user = "xxx";

//$db_pass = "xxx";

//$db_name = "xxx";



$m_link = mysql_connect ($cfg["db/hostname"], $cfg["db/username"], $cfg["db/password"]);

mysql_select_db ($cfg["db/database"], $m_link);



$num=0;

	function getlink ($str)

	{

		$link = substr ($str, 0, strlen($str)-2);

			$lnk = explode ('http://', $link);

			$link = 'http://'.$lnk[sizeof($lnk)-1];

		return $link;

	}

	

	function setlinks ($links, $catid)

	{

	Global $num;

		if (sizeof ($links)>=1)

		{

			foreach ($links as $link)

			{

				$feed = mysql_num_rows(mysql_query("select * from feeds WHERE feed_url='".$link."'"));

				if ($feed == 0)

				{

					$num++;

					$pos = mysql_fetch_assoc(mysql_query("select max(position) as posit from feeds"));

					$position =  ($pos) ? $pos["posit"]+1 : 1;

					mysql_query("INSERT INTO feeds VALUES (null, '', '', '".$catid."', '".$link."', '".$position."', '', '', '0', '2', '".time()."')");

					$feedid = mysql_insert_id();

					//

					//

					//

					$rss = fetch_rss($link);

						if ($rss && $rss->channel["title"]!='')

						{

							if ($rss->is_rss())

							{

								$type = "RSS ".$rss->is_rss();

							}

							else

							{

								$type = "ATOM ".$rss->is_atom();

							}

							

							$add = mysql_query("UPDATE feeds set `name`='".addslashes(strip_tags($rss->channel["title"]))."', `description`='".addslashes(strip_tags($rss->channel["description"]))."', `type`='".$type."', `link`='".addslashes($rss->channel["link"])."' WHERE id='".$feedid."'");

								if (!$add)

								{

									mysql_query("DELETE FROM feeds WHERE id='".$feedid."'") or die(mysql_error());

								}

						}

						else

						{

							mysql_query("DELETE FROM feeds WHERE id='".$feedid."'") or die(mysql_error());

							$del++;

						}

				}

			}

		}

	}

	

	function setcat ($catname, $tlcatname)

	{

		$tcat = mysql_fetch_assoc(mysql_query("SELECT * FROM cats WHERE name='".$tlcatname."'"));

		if ($tcat) {

			$toplevelid = $tcat["id"];

		}

		else

		{

			if ($tlcatname!='')

			{

			mysql_query("INSERT INTO cats VALUES (null, '0', '".$tlcatname."')");

			$toplevelid = mysql_insert_id();

			}

		}

		if ($catname=='')

		{

			return $toplevelid;

		}

		else

		{

			$pcat = mysql_fetch_assoc(mysql_query("SELECT * FROM cats WHERE parentid='".$toplevelid."' AND name='".$catname."'"));

			if ($pcat)

			{

				return $pcat["id"];

			}

			else

			{

				if ($catname!='')

				{

				mysql_query("INSERT INTO cats VALUES (null, '".$toplevelid."', '".$catname."')");

				return mysql_insert_id();

				}

			}		

		}

	}

	//$catn=1;

	//$page=2;

	for ($catn=1;$catn<=53;$catn++)

	{

	if ($num==100000) exit();

	$page=0;

		while ($page<=$max_page)

		{

		$page++;

				$url = "http://2rss.com/index.php?cat2=".$catn."&p=".$page;

			$handle = file_get_contents ($url);

			

			$isempty = (strpos($handle,"List empty")===FALSE) ? "noempty" : "empty";			

			$data_up = stristr($handle, "Top</a> ::");

			$data = substr($data_up, 0, strpos($data_up, "<script language=\"JavaScript\">"));

			

			$cats1 = explode("<br><br>", $data);

			############### CATEGORIES ###################

			$cats2 = explode("::", strip_tags($cats1[0]));

			$tlcatname=$cats2[sizeof($cats2)-2];

				$tcn = explode ("[", $cats2[sizeof($cats2)-1]);

			$catname = eregi_replace(" ", "", $tcn[0]);

			$catid = setcat($catname, $tlcatname);

			##############################################

			$data = $cats1[1];

			$temp = preg_match_all("(\<li style=\'margin-bottom: 5px;\'\>\<a href=\'index.php\?rss=([0-9]+)\'\>http:\/\/(.*[!-]))", $data, $out);			

			$links=array_map("getlink", $out[2]);

			//print "Category: [".$catn."] Page: [".$page."] Status: [".$isempty."] Links [".sizeof($links)."]<br>";

			setlinks ($links, $catid);

			// Set paging

			

			$pagg = stristr($data, "<strong>Page</strong>:");

			$paging = preg_match_all("(<a href=\'index.php\?cat2=([0-9]+)&p=([0-9]+)\'>([0-9]+)<\/a>)", $pagg, $pages);

			$paging = $pages[3];

			$max_page = $paging[sizeof($paging)-1];

			if (!$max_page || $max_page==0 || $max_page=='') $max_page=1;

		}	

	}

?>
 
0
•••
The views expressed on this page by users and staff are their own, not those of NamePros.
AfternicAfternic
Bump..

Anyonem, Pleaseeeee kindly help me. :)
 
0
•••
I think this is wrong
Code:
$temp = preg_match_all("(\<li style=\'margin-bottom: 5px;\'\>\<a href=\'index.php\?rss=([0-9]+)\'\>http:\/\/(.*[!-]))", $data, $out);
The content which is returned doesn't contain any index.php links. They were replaced by rs_xxx.html files, and you should update the regex accordingly
 
0
•••
flussie said:
I think this is wrong
Code:
$temp = preg_match_all("(\<li style=\'margin-bottom: 5px;\'\>\<a href=\'index.php\?rss=([0-9]+)\'\>http:\/\/(.*[!-]))", $data, $out);
The content which is returned doesn't contain any index.php links. They were replaced by rs_xxx.html files, and you should update the regex accordingly


Do i have to change the index.php to rs_xxx.html as you mention?

Thanks :)

-------------

is the following code is allright?

Code:
$temp = preg_match_all("(\<li style=\'margin-bottom: 5px;\'\>\<a href=\'rs_xxx.html\?rss=([0-9]+)\'\>http:\/\/(.*[!-]))", $data, $out);
 
0
•••
Try changing

$url = "http://2rss.com/index.php?cat2=".$catn."&p=".$page;
to
$url = "http://2rss.com/cat2_".$catn."_p".$page.".html";

and

$temp = preg_match_all("(\<li style=\'margin-bottom: 5px;\'\>\<a href=\'index.php\?rss=([0-9]+)\'\>http:\/\/(.*[!-]))", $data, $out);
to
$temp = preg_match_all("(\<li style=\'margin-bottom: 5px;\'\>\<a href=\'rss_([0-9]+)\.html\'\>http:\/\/(.*[!-]))", $data, $out);

and

$paging = preg_match_all("(<a href=\'index.php\?cat2=([0-9]+)&p=([0-9]+)\'>([0-9]+)<\/a>)", $pagg, $pages);
to
$paging = preg_match_all("(<a href=\'cat2_([0-9]+)_p([0-9]+)\'>([0-9]+)<\/a>)", $pagg, $pages);
 
Last edited:
0
•••
Unstoppable Domains
Domain Recover
DomainEasy โ€” Live Options
  • The sidebar remains visible by scrolling at a speed relative to the pageโ€™s height.
Back