<?php
//echo "接收到{$argc}个参数";print_r($argv);
//fwrite(STDOUT,'请输入您的博客名:');echo '您输入的信息是:'.fgets(STDIN);
//页面编码
//header("Content-Type:text/html;charset=utf-8");
//关掉浏览器,脚本继续执行
@error_reporting(E_ALL);
//时区设置
date_default_timezone_set('PRC');
//关掉浏览器,脚本继续执行
//ignore_user_abort();
//无限执行
set_time_limit(0);
//设置内存占用
ini_set('memory_limit', '200M');
$new_spider = new Spider_Amazon();
$model = 1;
//获取数据方式
if($model === 1) {
$pages = array(
array('pageTxt' => '2', 'getUrl' => '/s?k=historical+hoodies&gclid=Cj0KCQjwi7yCBhDJARIsAMWFScOghr04y7CRMYUnaATzy2KDgN38n7ZdWz-mesOVEJ0thQPllGs-UFcaAoNJEALw_wcB&hvadid=473310407031&hvdev=c&hvlocphy=1013962&hvnetw=g&hvqmt=e&hvrand=13573984021668640470&hvtargid=kwd-551795145806&hydadcr=8376_9905022&tag=googhydr-20&ref=pd_sl_4cg67q5p8b_e',),
);
$new_spider->check_pages($pages);
$new_spider->start_pages($pages);
$new_spider->create_pages($pages);
}elseif($model === 2){
$pages = array(
array('page'=>'page','pageTxt'=>'','getUrl'=>''),
);
$products = array(
array('product_txt' => '102', 'product_link' => '/President-Historical-Sweatshirt-Pullover-XX-Large/dp/B08KJBJW9R/ref=sr_1_3?dchild=1&gclid=Cj0KCQjwi7yCBhDJARIsAMWFScOghr04y7CRMYUnaATzy2KDgN38n7ZdWz-mesOVEJ0thQPllGs-UFcaAoNJEALw_wcB&hvadid=473310407031&hvdev=c&hvlocphy=1013962&hvnetw=g&hvqmt=e&hvrand=13573984021668640470&hvtargid=kwd-551795145806&hydadcr=8376_9905022&keywords=historical+hoodies&qid=1617075084&sr=8-3',),
array('product_txt' => '103', 'product_link' => '/Historical-Alexander-Hamilton-Washington-Sweatshirt/dp/B08LW1VGWZ/ref=sr_1_1_sspa?dchild=1&gclid=Cj0KCQjwi7yCBhDJARIsAMWFScOghr04y7CRMYUnaATzy2KDgN38n7ZdWz-mesOVEJ0thQPllGs-UFcaAoNJEALw_wcB&hvadid=473310407031&hvdev=c&hvlocphy=1013962&hvnetw=g&hvqmt=e&hvrand=13573984021668640470&hvtargid=kwd-551795145806&hydadcr=8376_9905022&keywords=historical+hoodies&qid=1617075084&sr=8-1-spons&psc=1&smid=A10YLYPK7OYW04&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUEzUDZDSjlBTkhQMlJLJmVuY3J5cHRlZElkPUEwNDM2ODcyMjhDNzJXMFkxUzNGNCZlbmNyeXB0ZWRBZElkPUEwMDc0OTIwMjNRWTBRUE0xMjlENiZ3aWRnZXROYW1lPXNwX2F0ZiZhY3Rpb249Y2xpY2tSZWRpcmVjdCZkb05vdExvZ0NsaWNrPXRydWU=',)
);
$new_spider->check_pages($pages);
$new_spider->start_products($products);
$new_spider->create_pages($pages);
}
$new_spider=null;unset($new_spider);
exit("\nEnd Of Execution\n");
//-----------------------------------------------------------------------------------------------------
class Spider_Amazon
{
const DEFAULT_DOMAIN = 'https://www.amazon.com';
private static $is_gzip = true;
private static $main_cache_txt;
private static $page_sleep;
private static $product_sleep;
private static $color_sleep;
private static $page_path;
private static $is_download;
public $page;
public $product;
public $page_has;
public $main_id;
public function __construct()
{
$root_dir = rtrim(str_replace('\\','/',__DIR__),'/').'/';
self::$main_cache_txt = 'default.txt';
self::$page_sleep = '.5';
self::$product_sleep = '.3';
self::$color_sleep = '.1';
self::$is_download = false;
self::$page_path = $root_dir.str_replace(array('http://','https://'),'',self::DEFAULT_DOMAIN).'/';
if(!is_dir(self::$page_path)) mkdir(self::$page_path,755);
}
public function __destruct()
{
// TODO: Implement __destruct() method.
}
/**
* @start
* @param array $pages
* @param
*/
public function start_pages($pages)
{
$is_debug = false;
$page_data = array();
$this->log('page starting...');
foreach ($pages as $items)
{
$this->page = 'page-'.$items['pageTxt'];
$getUrl = self::DEFAULT_DOMAIN.$items['getUrl'];
$pageJson = self::$page_path.$this->page . '.json';
$pageTxt = self::$page_path.$this->page . '.txt';
$page_dir = self::$page_path.$this->page . '/';
if(!is_dir($page_dir)) mkdir($page_dir,755);
$this->log($this->page,2);
$is_cache = $this->get_html($getUrl,$pageTxt,self::$is_gzip);
$items['page'] = $this->page;
$items['page_type'] = $is_cache['type'];
$items['page_txt'] = $pageTxt;
$items['page_url'] = $getUrl;
$items['dataList'] = array();
if($is_cache['code']!=0) {
$this->log( 'Page Get Html Cache Fail'.$is_cache['code'],2);
continue;
}
$page_list = $this->get_page_list($pageTxt);
if($page_list['code']!=0) {
$this->log( 'Page Get List Match Fail'.$page_list['code'],2);
continue;
}
foreach ($page_list['data'] as $k => $div)
{
$order_id = $k+1;
$data = $this->get_page_div_elem($div , array('product_link','product_asin','product_index','product_name','product_main'));
if(empty($data) || !isset($data['product_link']) || !isset($data['product_index'])) {
$this->log( 'Product Get Not Match',3);
continue;
}
$index = 'product-'.trim($data['product_index']);
$this->log($index,3);
$this->product = $index;
$data['product'] = $this->product;
$data['product_order'] = $order_id;
$data['product_error'] = '';
$data['product_dir'] = $page_dir.$index.'/';
$data['product_txt'] = $data['product_dir'].self::$main_cache_txt;
if(!is_dir($data['product_dir'])) mkdir($data['product_dir'],755);
$is_cache = $this->get_html($data['product_link'],$data['product_txt'],self::$is_gzip);
$data['product_type'] = $is_cache['type'];
if($is_cache['code']!=0) {
$this->log( 'Product Get Not Cache'.$is_cache['code'],3);
continue;
}
$info_data = $this->get_product_center_elem($data['product_txt'],array('color'=>1,'colors'=>1,'name'=>1,'price'=>1,'desc'=>1,'size'=>1,));
if($info_data['status']!=0 || empty($info_data['data'])){
$this->log( 'Product GET Not Match InfoData'.$info_data['status'],3);
continue;
}
$data['color_num'] = count($info_data['data']['colors']);
$data['color_main'] = $this->main_id;
$data['product_name'] = $info_data['data']['name'];
$data['product_color'] = $info_data['data']['color'];
$data['product_price'] = $info_data['data']['price'];
$data['product_desc'] = $info_data['data']['desc'];
$data['product_size'] = $info_data['data']['size'];
if(isset($this->page_has[$this->page][$this->product]) && count($this->page_has[$this->page][$this->product])==count($info_data['data']['colors'])) {
$data['product_colors'] = array();
$this->log('Already Has All Cache',7);
}else{
//其他颜色型号下载保存
$data['product_colors'] = $this->download_image( $info_data['data']['colors'] , $data['product_dir']);
}
$data['color_main'] = $this->main_id;
$items['dataList'][] = $data;
$data = null;$info_data=null;unset($data,$info_data);
sleep(self::$product_sleep);
if($k>2) break;
}
if($is_debug) $page_data[] = $items;
if(!isset($this->page_has[$this->page]) || (isset($this->page_has[$this->page])&&empty($this->page_has[$this->page])) )
@file_put_contents($pageJson,json_encode($items));
$page_list=null;unset($page_list);$items=null;unset($items);
sleep(self::$page_sleep);
}
return $page_data;
}
/**
* @start
* @param array $products
*/
public function start_products($products)
{
$this->log('products starting...');
$page_data = array();
$this->page = 'page';
if(isset($this->page_has[$this->page])) {
$this->log( 'Page Has : '.$this->page,2); //die;
}
$this->log($this->page,2);
$pageJson = self::$page_path.$this->page.'.json';
if(!is_dir(self::$page_path.$this->page)) mkdir(self::$page_path.$this->page);
$page_data['page'] = 'page';
$page_data['page_type'] = 'batch_products';
$page_data['page_txt'] = self::$page_path.$page_data['page'].'.txt';
$page_data['page_url'] = self::DEFAULT_DOMAIN;
$page_data['dataList'] = array();
foreach ($products as $k => $div)
{
$data = array();
$this->product = 'product-'.$div['product_txt'];
if(isset($this->page_has[$this->page][$this->product])){
$this->log( 'Page Product Has : '.$this->page.' '.$this->product,2); //continue;
}
$this->log($this->product,3);
$data['product_link'] = self::DEFAULT_DOMAIN.$div['product_link'];
$data['product'] = $this->product ;
$data['product_order'] = $k+1;
$data['product_error'] = null;
$data['product_index'] = $data['product_order'];
$data['product_main'] = '';
$data['product_dir'] = self::$page_path.$this->page.'/'.$this->product.'/';
$data['product_txt'] = $data['product_dir'].self::$main_cache_txt;
if(!is_dir($data['product_dir']))
mkdir($data['product_dir'],777,true);
$is_cache = $this->get_html($data['product_link'],$data['product_txt'],self::$is_gzip);
$data['product_type'] = $is_cache['type'];
if($is_cache['code']!=0) {
$this->log( 'Product Get Not Cache',3);
continue;
}
//当前主页面获得产品信息,与 颜色型号批量数据
$pro_info = $this->get_product_center_elem($data['product_txt'],array('color'=>1,'colors'=>1,'name'=>1,'price'=>1,'desc'=>1,'size'=>1,));
if($pro_info['status']!=0) {
$this->log( 'Product Get Not Data',3);
continue;
}
$data['product_name'] = $pro_info['data']['name'];
$data['product_color'] = $pro_info['data']['color'];
$data['product_price'] = $pro_info['data']['price'];
$data['product_desc'] = $pro_info['data']['desc'];
$data['product_size'] = $pro_info['data']['size'];
$data['color_num'] = count($pro_info['data']['colors']);
if(isset($this->page_has[$this->page][$this->product]) && count($this->page_has[$this->page][$this->product])==count($pro_info['data']['colors'])) {
$data['product_colors'] = array();
$this->log('Already Has All Cache',7);
}else{
//其他颜色型号下载保存
$data['product_colors'] = $this->download_image( $pro_info['data']['colors'] , $data['product_dir']);
}
$data['color_main'] = $this->main_id;
$page_data['dataList'][] = $data;
$data = null;$pro_info=null;unset($data,$pro_info);
sleep(self::$product_sleep);
}
if( !isset($this->page_has[$this->page]) || (isset($this->page_has[$this->page])&&empty($this->page_has[$this->page])) ) {
@file_put_contents($pageJson,json_encode($page_data));
}
return $page_data;
}
private function log($log, $e = 1, $r = ' ')
{
echo str_repeat($r,$e).$log."\n";
}
private function error_log($func,$reason,$handle,$res)
{
$err = date('Y-m-d H:i:s')."\n\t".$func."\n\t".$reason."\n\t".$handle."\n\t".json_encode($res);
if(is_file(self::$page_path.'error_log.txt')&& filesize(self::$page_path.'error_log.txt')>1024*1024*3) @unlink(self::$page_path.'error_log.txt');
file_put_contents(self::$page_path.'error_log.txt',$err,FILE_APPEND);
}
/**
* @check_pages
* @param array $pages
* @param array $img_url_type
*/
public function check_pages($pages){
$page_has = array();
foreach ($pages as $items)
{
$page = isset($items['page']) ? $items['page'] : 'page-'.$items['pageTxt'];
$pageJson = self::$page_path.$page . '.json';
if(!is_file($pageJson)) {
$this->log( 'Check Cache :'.$page.' No File',2);continue;
}
$page_obj = json_decode(file_get_contents($pageJson));
if(!$page_obj || !isset($page_obj->dataList) || count($page_obj->dataList)<=0) {
$res = @unlink($pageJson);
if(!$res) $this->log( 'Check Cache : '.$page.' Data Fail',2);
continue;
}
$page_has[$page] = null;
foreach ($page_obj->dataList as $obj)
{
if(!isset($obj->product) || !isset($obj->product_colors)) {
$this->log( 'Check Cache : '.$page.' NotSet Product',2);continue;
}
$page_has[$page][$obj->product] = null;
foreach ($obj->product_colors as $ob)
{
if(!isset($ob->color_name) || !isset($ob->color_id) || !empty($ob->color_error) || !isset($ob->image_num))
{
$this->log( 'Check Cache : '.$page.' '.$obj->product.' '.$ob->color_id.' NotData',2);
$page_has[$page][$obj->product][$ob->color_id] = null;
continue;
}
if(!isset($ob->image_normal) && !isset($ob->image_large) && !isset($ob->image_thumb) && !isset($ob->image_small))
{
$this->log( 'Check Cache : '.$page.' '.$obj->product.' '.$ob->color_id.' NotSet ImageFiles',2);
$page_has[$page][$obj->product][$ob->color_id] = null;
continue;
}
$page_has[$page][$obj->product][$ob->color_id] = true;
}
}
}
$this->page_has = $page_has;
}
/**
* @check_pages
* @param array $pages
* @param array $img_url_type
*/
public function create_pages($pages){
foreach ($pages as $items)
{
$page = isset($items['page']) ? $items['page'] : 'page-'.$items['pageTxt'];
$pageJson = self::$page_path.$page . '.json';
$page_colors_xls= self::$page_path.$page.'_curl_amazon_class_colors.xls';
if(!is_file($pageJson)) {
$this->log( 'Create Excel : '.$page.' No JsonDataFile',2);continue;
}
if(is_file($page_colors_xls)) {
@chmod($page_colors_xls,755);
$res = @unlink($page_colors_xls);//if(!$res) continue;
$this->log( 'Create Excel : '.$page.' Xls Already Exists Please Delete manually',2);
}
$excels = array(
'page' =>'',
'product' =>'',
'product_name' =>'',
'product_price' =>'',
'product_size' =>'',
'product_desc' =>'',
'color_price' =>'',
'color_size' =>'',
'color_desc' =>'',
'color_main' =>'',
'color_num' =>'',
'color_id' =>'',
'color_name' =>'',
'color_cache' =>'',
'image_num' =>'',
'image_large' =>'',
'image_normal' =>'',
'image_thumb' =>'',
'image_small' =>'',
'page_url' =>'',
'page_type' =>'',
'page_txt' =>'',
'product_asin' =>'',
'product_index' =>'',
'product_link' =>'',
'product_order' =>'',
'product_error' =>'',
'product_type' =>'',
'product_dir' =>'',
'product_txt' =>'',
'color_asin' =>'',
'color_url' =>'',
'color_dir' =>'',
'color_txt' =>'',
'color_error' =>'',
'image_normal_status' =>'',
'image_normal_file' =>'',
'image_normal_size' =>'',
'image_thumb_status' =>'',
'image_thumb_file' =>'',
'image_thumb_size' =>'',
'image_large_status' =>'',
'image_large_file' =>'',
'image_large_size' =>'',
'image_small_status' =>'',
'image_small_file' =>'',
'image_small_size' =>'',
);
$colors_xls = implode("\t",array_keys($excels))."\n";
$page_obj = json_decode(file_get_contents($pageJson));
if(!is_object($page_obj) || !isset($page_obj->dataList) || count($page_obj->dataList)<=0) {
$this->log( 'Create Excel : '.$page.' Data Fail',2); // @unlink($pageJson);
continue;
}
$excels['page'] = $page_obj->page;
$excels['page_type'] = isset($page_obj->page_type) ? $page_obj->page_type: '';
$excels['page_txt'] = isset($page_obj->page_txt) ? $page_obj->page_txt: '';
$excels['page_url'] = isset($page_obj->page_url) ? $page_obj->page_url: '';
foreach ($page_obj->dataList as $obj)
{
if(!isset($obj->product) || !isset($obj->product_colors)) {
$this->log( 'Create Excel : '.$page.' NotSet Product',2);
continue;
}
foreach ($obj as $produ_i => $produ_v)
{
if(!isset($excels[$produ_i]) ) continue;
if(is_string($produ_v) || is_numeric($produ_v)) {
$excels[$produ_i] = $produ_v;
}elseif(strpos('product_desc,product_size',$produ_i)!==false && is_array($produ_v)){
$excels[$produ_i] = implode(',',$produ_v);
}
}
foreach ($obj->product_colors as $ob)
{
foreach($excels as $k => $v){ if(strpos($k,'page')===false&&strpos($k,'product')===false) $excels[$k] = ''; }
if(!isset($ob->image_normal) && !isset($ob->image_large) && !isset($ob->image_thumb) && !isset($ob->image_small)) {
$this->log( 'Create Excel : '.$page.' '.$obj->product.' NotSet ImageFiles',2);
continue;
}
foreach ($ob as $color_i => $color_v)
{
if(!isset($excels[$color_i])) continue;
if( is_string($color_v)||is_numeric($color_v) ){
$excels[$color_i] = $this->decode_str($color_v);
}
elseif(strpos('color_desc,color_size',$color_i)!==false && is_array($color_v))
{
$excels[$color_i] = implode(',',$color_v);
}
elseif(strpos('image_normal,image_large,image_thumb,image_small',$color_i)!==false && is_array($color_v))
{
$imgs = null;
foreach ($color_v as $images)
{
$imgs['image_url'][] = $images->image_url;
$imgs['image_file'][] = $images->image_file;
$imgs['image_status'][] = $images->image_status;
$imgs['image_size'][] = $images->image_size;
}
$excels[$color_i] = implode(',',$imgs['image_url']);
if($color_i=='image_normal')
{
$excels['image_normal_file'] = implode(',',$imgs['image_file']);
$excels['image_normal_size'] = implode(',',$imgs['image_size']);
$excels['image_normal_status'] = implode(',',$imgs['image_status']);
}else if($color_i=='image_large') {
$excels['image_large_file'] = implode(',',$imgs['image_file']);
$excels['image_large_size'] = implode(',',$imgs['image_size']);
$excels['image_large_status'] = implode(',',$imgs['image_status']);
}else if($color_i=='image_thumb') {
$excels['image_thumb_file'] = implode(',',$imgs['image_file']);
$excels['image_thumb_size'] = implode(',',$imgs['image_size']);
$excels['image_thumb_status'] = implode(',',$imgs['image_status']);
}else if($color_i=='image_small') {
$excels['image_small_file'] = implode(',',$imgs['image_file']);
$excels['image_small_size'] = implode(',',$imgs['image_size']);
$excels['image_small_status'] = implode(',',$imgs['image_status']);
}
}
}
if($excels) $colors_xls .= implode("\t",$excels)."\n";
}
}
@file_put_contents($page_colors_xls,$colors_xls);
$colors_xls =null; unset($colors_xls);
}
$colors_xls =null; unset($colors_xls);
}
public function decode_str($decode_str){
$decode_str = str_replace('-','_',$decode_str);
$d = mb_detect_encoding($decode_str);
$str = '';
if( $d!='ASCII' || $d=='UTF-8' && strlen($decode_str) != mb_strlen($decode_str) ) {
for ($i=0 ;$i< strlen($decode_str);$i++){
$str .= preg_match('/[\x7f-\xff]/',$decode_str{$i}) ? '_' : $decode_str{$i};
}
}else{
$str = $decode_str;
}
return $str;
}
/**
* @desc
* @param $getUrl string
* @param $cache_path string
* @param $cache_txt string
* @param $is_gzip string
* @return array
*/
private function get_html($getUrl,$cache_txt,$is_gzip=false)
{
$res = array('code' => 0, 'msg'=>'succeed','type'=>null);
if(empty($getUrl) || empty($cache_txt)) return array('code'=>-100,'empty get params') ;
$getUrl = str_replace('&','&',trim($getUrl));
if(is_file($cache_txt))
{
$html = file_get_contents($cache_txt);
if(empty($html) || strlen($html)<5000 || !$this->detect_char_encoding($html))
{
$res = $this->get_request_html($getUrl , $cache_txt , $is_gzip);
}else{
$res['type'] = 'file';
}
} else {
$res = $this->get_request_html($getUrl , $cache_txt , $is_gzip);
}
return $res;
}
private function get_request_html($url,$write_file,$is_gzip=false)
{
// 第一次func请求
$context = stream_context_create(array('http'=>array('method'=>'GET', 'timeout'=>60,)));
if($is_gzip){
$html = @file_get_contents("compress.zlib://".$url,false,$context);
}else{
$html = @file_get_contents($url,false,$context);
}
if(isset($html) && $html && !empty($html) && strlen($html)>5000)
{
$res = $this->to_check_codes($html,$write_file);
$res['type'] = 'func';
return $res;
}
// 第二次get请求
if($is_gzip){
$html = @curl_get($url,$this->$is_gzip);
}else{
$html = @curl_get($url);
}
if(isset($html) && $html && !empty($html) && strlen($html)>5000)
{
$res = $this->to_check_codes($html,$write_file);
$res['type'] = 'gets';
return $res;
}
// 第三次post请求
$html = @curl_post($url,array());
if(isset($html) && $html && !empty($html) && strlen($html)>5000)
{
$res = $this->to_check_codes($html,$write_file);
$res['type'] = 'post';
return $res;
}
return array('code'=>-4,'type'=>'request','msg' =>'');
}
public function to_check_codes($html,$write_file)
{
$html= trim($html);
$res = array('code'=>0,'msg'=>'',);
if($this->detect_char_encoding($html))
{
$byte = file_put_contents($write_file,$html);
if (!$byte || $byte < 5000)
{
$res['code'] = -1;
$res['msg'] = 'fail create file';
}
}else{
$html = $this->gzip_decode($html);
if($this->detect_char_encoding($html))
{
$byte = file_put_contents($write_file,$html);
if (!$byte || $byte < 5000)
{
$res['code'] = -2;
$res['msg'] = 'fail gzip file';
}
}else{
@unlink($write_file);
$res['code'] = -3;
$res['msg'] = 'fail gzip coding';
}
}
return $res;
}
/**
* @desc
* @param $txt
* @return array
*/
private function get_page_list($txt)
{
$return = array( 'code'=>0,'data'=>NULL,'msg'=>'succeed' );
$html = file_get_contents($txt);
$p_arr = NULL;
preg_match('/\"s-main-slot s-result-list s-search-results sg-row\"(.*?)\"s-result-list-placeholder aok-hidden sg-row\"/is',$html,$p_arr);
if(!isset($p_arr[1]) || strlen($p_arr[1])<500)
{
$return['code'] = -1;
$return['msg'] = 'error : failed to get location div for page';
}else{
$p_str = trim($p_arr[1]);
$p_str = preg_replace('/(data-asin=\"\" data-index=\"[^1]+\" class=\"a)/is','',$p_str);
$p_arr = null;
preg_match_all('/<div data-asin=\"(.*?)class=\"a-section a-spacing-none a-spacing-top-micro/is',$p_str,$p_arr);
if(!isset($p_arr[1]) || count($p_arr[1])<5)
{
$return['code'] = -2;
$return['msg'] = 'error : failed to get list div for page';
}else{
$return['data'] = $p_arr[1];
$return['count'] = count($p_arr[1]);
}
}
return $return;
}
/**
* @desc
* @param $div string html list div
* @param $params array link name
* @return array
*/
private function get_page_div_elem($div ,$params)
{
if(!$params || empty($params )) return array();
$link = null; $name= null; $asin = null; $index=null; $main_img =null;
$set_params = array_flip($params);
if(isset($set_params['product_name']) || isset($set_params['product_link']))
{
$p_arr = null;
preg_match('/<a class=\"a-link-normal a-text-normal(.*?)<\/a/is',$div,$p_arr);
if(isset($p_arr[1]) && $p_arr[1])
{
$p_str = trim($p_arr[1]);
if(isset($set_params['product_link']))
{
$p_arr = null;
preg_match('/href=\"(.*?)\"/is',$p_str,$p_arr);
if( isset($p_arr[1])&&$p_arr[1] ) $link = self::DEFAULT_DOMAIN. trim($p_arr[1]);
}
if(isset($set_params['product_name']))
{
$p_arr = null;
preg_match('/<span class="(.*?)\/span>/is',$p_str,$p_arr);
if(isset($p_arr[1]) && $p_arr[1])
{
$p_arrs = null;
preg_match('/dir=\"auto\">(.*?)</is',$p_arr[1],$p_arrs);
if( isset($p_arrs[1])&&$p_arrs[1] ) $name = trim($p_arrs[1]);
}
}
}
}
if(isset($set_params['product_main']))
{
if(isset($set_params['product_main']))
{
$p_arr = null;
preg_match('/img src=\"(.*?)\"/is',$div,$p_arr);
if( isset($p_arr[1]) && $p_arr[1]) $main_img = $this->custom_url(trim($p_arr[1]));
}
}
if(isset($set_params['product_index']) || isset($set_params['product_asin']))
{
$p_arr = null;
preg_match('/(.*?)>/is',$div,$p_arr);
if(isset($p_arr[1]) && $p_arr[1] && strlen($p_arr[1])>10)
{
$p_str = trim($p_arr[1]);
if(isset($set_params['product_asin']))
{
$p_arr = null;
preg_match('/(.*?)\"/is',$p_str,$p_arr);
if(isset($p_arr[1])&&$p_arr[1]) $asin = trim($p_arr[1]);
}
if(isset($set_params['product_index']))
{
$p_arr = null;
preg_match('/data-index=\"(.*?)\"/is',$p_str,$p_arr);
if(isset($p_arr[1])&&$p_arr[1]) $index = trim($p_arr[1]);
}
}
}
return array('product_main'=>$main_img,'product_link'=>$link,'product_name'=>$name,'product_index'=>$index,'product_asin'=>$asin,);
}
private function download_image($arr,$save_dir)
{
$status = array();
if(!$arr) return $status;
foreach ($arr as $item)
{
$item['color_url'] = self::DEFAULT_DOMAIN.$item['color_url'];
$id = $item['color_id'];
if( isset($has_data[$this->page][$this->product][$id]) && $has_data[$this->page][$this->product][$id] ) {
$this->log('Already Has Cache',7); continue;
}
$save_path = rtrim($save_dir,'/').'/'.$id;
$cache_txt = $save_path.'.txt';
if(isset($this->main_id) && $this->main_id==$id) $cache_txt = rtrim($save_dir,'/').'/'.self::$main_cache_txt;
if(self::$is_download && !is_dir($save_path)) mkdir($save_path, 755);
$is_cache = $this->get_html($item['color_url'], $cache_txt,self::$is_gzip);
if ($is_cache['code']!=0) {
$this->log('Get Color Cache Fail',7); continue;
}
$item['color_cache'] = $is_cache['type'];
$item['color_dir'] = $save_path.'/';
$item['color_txt'] = $cache_txt;
//其他颜色型号 产品价格,描述,大小
$res = $this->get_product_center_elem($cache_txt , array('price'=>1,'desc'=>1,'size'=>1) );
if ($res['status']!=0 || empty($res['data']) ) {
$this->log('Get Color images Fail'.$res['status'],7); continue;
}
$item['color_price'] = $res['data']['price'];
$item['color_desc'] = $res['data']['desc'];
$item['color_size'] = $res['data']['size'];
//其他颜色型号 左侧图片
$res = $this->get_product_left_images($cache_txt);
if ($res['status']!=0 || empty($res['data'])) {
$this->log('Get Color images Fail'.$res['code'],7); continue;
}
$save_path = rtrim($save_path,'/').'/';
$color_img_down_log = '';
foreach($res['data'] as $ty => $val)
{
$down = null; $save_file = null; $url = null;
$save_file = $save_path.$ty;
foreach($val as $i => $img_url) {
if(self::$is_download) {
$save_file .= ($i+1).strrchr($img_url,'.');
if(!is_file($save_file))
$down = $this->download_file($img_url,$save_file);
else
$down = array('code'=>200,'size'=>(filesize($save_file)/1024).' kb');
}else{
$down = array('code'=>204,'size'=>0);
}
if(isset($down) && $down['code'])
{
$item[$ty][$i] = array(
'image_url' => $img_url,
'image_file' => self::$is_download ? $save_file : ' ',
'image_status' => $down['code'],
'image_size' => self::$is_download ? $down['size'] : ' ',
);
}
}
$color_img_down_log .= ' '.$down['code'].",";
}
$item['image_num'] = count($item['image_normal']);
$this->log($id.' images('.count($res['data']).') '.$color_img_down_log,7);
$status[$id] = $item;
}
return $status;
}
/**
* @param $info_txt string
* @param $set_params array
*/
private function get_product_center_elem($info_txt,$set_params=null)
{
$res = array( 'status' => 0 , 'data'=> null);
if(!isset($set_params))
$set_params = array(
'color' =>1, 'colors'=>1, 'name' =>1,
'price' =>null, 'desc' =>null, 'size' =>null,
'brand' =>null, 'score' =>null
);
$info_html = file_get_contents($info_txt);
if(empty($info_html) || strlen($info_html)<5000 || empty($set_params) || strpos($info_html,'id="centerCol"')===false ) {
$res['status'] = -200; return $res;
}
//中间取产品型号
$arr = null;
preg_match('/id=\"centerCol\"(.*?)id=\"rightCol\"/is', $info_html, $arr);
if (!isset($arr[1]) || empty($arr[1]) || strlen($arr[1]) < 500) {
$res['status'] = -202; return $res;
}
$p_str = trim($arr[1]);$info_html = null;
/*---name----brand------score---------------------------------------------------------
<div id="titleBlock
<div id="titleBlockLeftSection"
<div id="bylineInfo_feature_div"
<div
<a id="bylineInfo" (brand) </a>
</div>
</div>
<div>
<div id="titleBlockRightSection"
<div id="title_feature_div"
<div id="titleSection"
<h1 id="title" <span id="productTitle" (name) <span>
</div>
</div>
<div id="averageCustomerReviews_feature_div"
<div id="averageCustomerReviews"
<span id="acrPopover" (score) </span>
</div>
</div>
</div>
</div>
<div id="delightPricingBadge_feature_div"
*/
if(isset($set_params['name']))
{
$arr = array();
preg_match('/span id=\"productTitle\"(.*?)\/span/is', $p_str, $arr);
if (!isset($arr[1]) || (isset($arr[1])&&strlen($arr[1]) < 10) ) {
$res['status'] = -210;return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match('/\">(.*?)</is', $str, $arr);
if (!isset($arr[1]) || (isset($arr[1])&&strlen($arr[1]) < 10) ){
$res['status'] = -210;return $res;
}
$res['data']['name'] = str_replace("\t",' ',trim($arr[1]));
}
if(isset($set_params['brand']))
{
$arr = array();
preg_match('/<a id=\"bylineInfo\"(.*?)\/span>/is', $p_str, $arr);
if (!isset($arr[1]) || (isset($arr[1]) && strlen($arr[1]) < 10)) {
$res['status'] = -220;
return $res;
}
$str = trim($arr[1]);
$arr = null;
preg_match('/\">(.*?)</is', $str, $arr);
if (!isset($arr[1]) || (isset($arr[1]) && strlen($arr[1]) < 10)) {
$res['status'] = -230;
return $res;
}
$res['data']['brand'] = trim($arr[1]);
}
if(isset($set_params['score']))
{
$arr = array();
preg_match('/<span id=\"acrPopover\"(.*?)\/a>/is', $p_str, $arr);
if (!isset($arr[1]) || (isset($arr[1])&&strlen($arr[1]) < 10) ) {
$res['status'] = -240;return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match('/a-icon-alt\">(.*?)</is', $str, $arr);
if (!isset($arr[1]) || (isset($arr[1])&&strlen($arr[1]) < 10) ){
$res['status'] = -240;return $res;
}
$res['data']['score'] = trim($arr[1]);
}
/*---price----------------------------------------------------------------------------------------------
<div id="desktop_unifiedPrice
<div id="unifiedPrice_feature_div"
<div id="price"
<table
<tr id="priceblock_ourprice_row"
<td id="priceblock_ourprice_lbl"
Price:
</td>
td>
<span id="priceblock_ourprice" (price) </span>
<span id="priceblock_ourprice" </span>
</td>
</tr>
</table>
</div>
<div>
</div>
<div id="promoPriceBlockMessage_feature_div"
*/
if(isset($set_params['price']))
{
$arr = null;
preg_match('/id=\"priceblock_ourprice\"(.*?)\//is', $p_str, $arr); //结束id="globalStoreInfoBullets_feature_div"
if (!isset($arr[1]) || strlen($arr[1]) < 10) {
$res['status'] = -271; return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match('/\">(.*?)</is', $str, $arr);
if (!isset($arr[1]) || empty($arr[1])) {
$res['status'] = -270; return $res;
}
$res['data']['price'] = str_replace(array(' ','$'),'',trim($arr[1]));
}
/*---desc----------------------------------------------------------------------------------------------
<div id="featurebullets_feature_div
<div id="feature-bullets"
<div id="bylineInfo_feature_div"
<ul
<li><span class="a-list-item" (desc) </span></li>
</ul>
</div>
<div>
</div>
<div id="globalStoreInfoBullets_feature_div"
*/
if(isset($set_params['desc']))
{
$arr = null;
preg_match('/<div id=\"featurebullets_feature_div\"(.*?)<\/ul>/is', $p_str, $arr); //结束id="globalStoreInfoBullets_feature_div"
if (!isset($arr[1]) || strlen($arr[1]) < 100) {
$res['status'] = -250; return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match_all('/a-list-item\">(.*?)<\//is', $str, $arr);
if (!isset($arr[1]) || empty($arr[1])) {
$res['status'] = -250; return $res;
}
foreach ($arr[1] as $i => $str_s) { $arr[1][$i] = trim($str_s);}
$res['data']['desc'] = $arr[1];
}
/*---colors color size-------------------------------------------------------------
<div id="softlinesTwister_feature_div
<div id="twisterContainer"
<div class="a-section"
<form
<div id="variation_size_name"
<span
<select
<option (size) </option>
</select>
</span>(brand)
</div>
<div id="variation_color_name"
<div
<label </label><span class="selection"> (color) </span>
</div>
<ul>
<li (colors) </li>
</ul>
</div>
</form>
</div>
<div>
</div>
<div id="twister_description_div"
*/
if(isset($set_params['size']))
{
$arr = null;
preg_match('/id=\"variation_size_name\"(.*?)<\/select>/is', $p_str, $arr); //结束id="variation_color_name"
if (!isset($arr[1]) || empty($arr[1]) ) {
$res['status'] = -260; return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match_all('/data-a-html-content=\"(.*?)\"/is', $str, $arr); //id="native_dropdown_selected_size_name"
if (!isset($arr[1]) || empty($arr[1])) {
$res['status'] = -260; return $res;
}
foreach ($arr[1] as $i => $str_s) { $arr[1][$i] = trim($str_s);}
$res['data']['size'] = $arr[1];
}
if(isset($set_params['color']))
{
$arr = null;
preg_match('/id=\"variation_color_name\"(.*?)<script/is', $p_str, $arr); //结束id="twister_description_div"
if (!isset($arr[1]) || (isset($arr[1]) && strlen($arr[1]) < 10)) {
$res['status'] = -270;return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match('/Color:(.*?)\/span/is', $str, $arr);
if (!isset($arr[1]) || (isset($arr[1]) && strlen($arr[1])<10)) {
$res['status'] = -270;return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match('/class=\"selection\">(.*?)</is', $str, $arr);
if(!isset($arr[1]) || (isset($arr[1]) && strlen($arr[1])<10)) {
$res['status'] = -270;return $res;
}
$res['data']['color'] = trim($arr[1]);
}
if(isset($set_params['colors']))
{
$arr = null;
preg_match('/id=\"variation_color_name\"(.*?)<script/is', $p_str, $arr); //结束id="twister_description_div"
if (!isset($arr[1]) || (isset($arr[1]) && strlen($arr[1]) < 100)) {
$res['status'] = -280; return $res;
}
$str = trim($arr[1]); $arr = null;
preg_match_all('/<li(.*?)<\/li>/is', $str, $arr); //li>(.*?)<\/li
if (!isset($arr[1]) || empty($arr[1])) {
$res['status'] = -281; return $res;
}
$isset_ids = array();
foreach ($arr[1] as $i => $li_str) {
if (empty($li_str) || strlen($li_str) < 10 || strpos($li_str, 'src') === false) continue;
$id = $name = $url = $asin = $thumb = null;
//统计每个颜色的id
$c_arr = null;
preg_match('/id=\"(.*?)\"/is', $li_str, $c_arr);
$id = (isset($c_arr[1]) && !empty($c_arr[1])) ? trim($c_arr[1]) : 'color_'.$i;
$colorSet[$id] = true;
//统计每个颜色的名称
$c_arr = null;
preg_match('/alt=\"(.*?)\"/is', $li_str, $c_arr);
if (isset($c_arr[1]) && !empty($c_arr[1])) $name = trim($c_arr[1]);
//统计每个颜色的标识
$c_arr = null;
preg_match('/data-defaultAsin=\"(.*?)\"/is', $li_str, $c_arr);
if (isset($c_arr[1]) && !empty($c_arr[1])) $asin = trim($c_arr[1]);
//统计每个颜色的缩略主图
$c_arr = null;
preg_match('/src=\"(.*?)\"/is', $li_str, $c_arr);
if (isset($c_arr[1]) && !empty($c_arr[1])) $thumb = $this->custom_url($c_arr[1]);
//统计每个颜色的url ,根据此链接找出大图 ,跳过当前页面型号
$c_arr = null;
preg_match('/data-dp-url=\"(.*?)\"/is', $li_str, $c_arr);
if (isset($c_arr[1]) && !empty($c_arr[1])) $url = trim($c_arr[1]);
if (!isset($id) || !isset($name) || !isset($asin) || !isset($thumb)) continue; //url
$info = array(
'color_name' => $name,
'color_asin' => $asin,
'color_image' => $thumb,
'color_id' => $id,
'color_url' => $url,
'color_error' => '',
);
if(isset($isset_ids[$id])) {
$info['color_error'] = ' repeat the data of product model collection';
continue;
}
if(!$url) {
if($name == $res['data']['color']) {
$this->main_id = $id;
}else{
$info['color_error']= 'product main model (color) error';
}
}
if($id && isset($this->page_has[$this->page][$this->product][$id])){
$this->log( 'Page Product Color Has : '.$this->page.' '.$this->product.' '.$id,7); //continue;
}
$res['data']['colors'][$id] = $info;
$isset_ids[$id] = true;
}
if(empty($res)){
$res['status'] = -281; return $res;
}
}
$arr = null; $p_str = null;unset($arr,$p_str,$str,$info_html);
return $res;
}
/**
* @param $txt string html->list->div info_link
* @param $position string
* @return array
*/
private function get_product_left_images($txt,$position = 'all')
{
$res = array('status'=>0,'data'=>array());
$info_color_html= file_get_contents($txt);
if(!is_file($txt) || empty($info_color_html) || strlen($info_color_html)<5000) {
$res['status'] = -2; return $res;
}
//左侧取产品大图(仅当前选中的为第一个型号)
$p_arr = null;
preg_match('/id=\"leftCol\"(.*?)id=\"centerCol\"/is', $info_color_html, $p_arr);
$info_color_html = null;
if (!isset($p_arr[1]) || strlen($p_arr[1]) < 500) {
$res['status'] = -3; return $res;
}
$left_str = trim($p_arr[1]);
$isHas = array();
if($position == 'all' || $position == 'left')
{
$p_arr = null;
//取页面脚本所有图片地址
preg_match_all('/<script(.*?)<\/script>/is', $left_str, $p_arr);
if (isset($p_arr[1]) && !empty($p_arr[1]))
{
$arr = array_filter($p_arr[1]);
$str = null;
foreach ($arr as $json){
if($json && strpos($json,'ImageBlockATF')!==false){$str = trim($json);break;}
}
$arr = null;
if(isset($str)) {
$p_arr = null;
preg_match('/var data(.*?);/is',$str,$p_arr);
$str = isset($p_arr[1])&& strlen($p_arr[1]) ? trim(str_replace('=','',$p_arr[1])) : NULL;
$p_arr = null;
if($str) {
$p_arr = $this->custom_json($str);
if($p_arr) {
$images = null;
foreach ($p_arr as $item) {
if(isset($item->hiRes)) {
$ur = $this->custom_url($item->hiRes);
$na = basename($item->hiRes);
if(!isset($isHas[$na]))$res['data']['image_normal'][] = $ur;
$isHas[$na] = 1;
}
if(isset($item->large)) {
$ur = $this->custom_url($item->large);
$na = basename($ur);
if(!isset($isHas[$na]))$res['data']['image_large'][] = $ur;
$isHas[$na] = 1;
}
if(isset($item->thumb)) {
$ur = $this->custom_url($item->thumb);
$na = basename($ur);
if(!isset($isHas[$na]))$res['data']['image_thumb'][] = $ur;
$isHas[$na] = 1;
}
}
}
}else{
$res['status'] = -2012;
}
}else{
$res['status'] = -2011;
}
}else{
$res['status'] = -201;
}
}
if($position == 'all' || $position == 'center')
{
//取页面侧边小图
$p_arr = null;
preg_match('/id=\"altImages\"(.*?)<script/is',$left_str, $p_arr);
if (isset($p_arr[1]) && strlen($p_arr[1]) > 500)
{
$str = trim($p_arr[1]);
$p_arr = null;
preg_match_all('/<li class=\"a-spacing-small item(.*?)<\/li>/is', $str, $p_arr);
if (isset($p_arr[1]) && !empty($p_arr[1]))
{
$arr = $p_arr[1];
$str = null;
$p_arr = null;
foreach ($arr as $i => $li)
{
$p_arr = NULL;
preg_match('/src=\"(.*?)\"/is',$li,$p_arr);
if(isset($p_arr[1]) && strlen($p_arr[1])>10)
{
$ur = $this->custom_url($p_arr[1]);
$na = basename($ur);
if(!isset($isHas[$na])) $res['data']['images_small'][] = $ur;
$isHas[$na] = 1;
}
}
}else{
$res['status'] = -2021;
}
}else{
$res['status'] = -202;
}
}
if($position == 'all' || $position == 'first')
{
//仅仅取首张大图 需要hover动作才会显示所有元素
$p_arr = null;
preg_match('/ul class=\"a-unordered-list a-nostyle a-horizontal list maintain-height(.*?)<\/ul>/is', $left_str, $p_arr);
if (isset($p_arr[1]) && strlen($p_arr[1]) > 500)
{
$str = $p_arr[1];
$p_arr = null;
preg_match_all('/<li class=\"image item itemNo(.*?)<\/li>/is', $str, $p_arr);
if (isset($p_arr[1]) && !empty($p_arr[1]))
{
$str = null;
$arr = $p_arr[1];
foreach ($arr as $i => $div)
{
$p_arr = null;
preg_match('/src=\"(.*?)"/is', $div, $p_arr);
if (isset($p_arr[1]) && strlen($p_arr[1]) > 10)
{
$ur = $this->custom_url(trim($p_arr[1]));
$na = basename($ur);
if(!isset($isHas[$na])) $res['data']['image_first'][] = $ur;
$isHas[$na] = 1;
}
}
if (!$res['data'] || empty($res['data'])) $res['status'] = -2033;
} else {
$res['status'] = -2032;
}
}else{
$res['status'] = -2031;
}
}
return $res;
}
private function custom_url($url,$type=1)
{
$url = trim($url);
if(strpos($url,'data:image')!==false) $type=0;
switch ($type)
{
case 1 :
$filepath = dirname($url).'/';
$filename = basename($url);
$s = strpos($filename,'.');
$ext = strrchr($filename,'.');
if($ext === false || $s===false){
$newUrl = null;
}else{
$newUrl = $filepath.substr($filename,0,$s).$ext;
}
break;
default:
$newUrl = $url;
break;
}
return $newUrl;
}
private function custom_json($html_json, $field1 = 'colorImages',$field2 = 'initial',$field3 = null)
{
$res = array();
if( empty($html_json) || strlen($html_json)<500) return $res;
$res['html_json'] = is_null(json_decode($html_json)) ? NULL : json_decode($html_json);
$res['html'] = $html_json;
$rn = substr($html_json,1,1);
foreach (array(''=> array(' ',"\n","\r\n",$rn,PHP_EOL), '"' => array("'",'”'), ']'=> ',]',) as $repl => $find)
{
$html_json = str_replace($find,$repl,$html_json) ;
}
$html_json = preg_replace('/A.\$.parseJSON\((.*?)\)/is','""',$html_json) ;
$res['format'] = $html_json;
$res['format_json'] = is_null(json_decode($html_json)) ? NULL : json_decode($html_json);
$data = NULL;
if($res['format_json'])
{
$data = $res['format_json'];
if( $field1 && isset($res['format_json']->$field1) ) $data = $data->$field1;
if( $field2 && isset($data->$field2) ) $data = $data->$field2;
if( $field3 && isset($data->$field3) ) $data = $data->$field3;
}
$res['data'] = $data;
return $res['data'];
}
private function detect_char_encoding($str , $default = 'UTF-8' , $isTrans = false)
{
$encode = mb_detect_encoding($str, array("ASCII","GB2312","GBK","UTF-8","BIG5"));
if($isTrans)
{
if (stripos($encode,$default)===false) {
if (stripos($encode,'CP936')===false) {
return mb_convert_encoding($str, $default, $encode);
} else {
return iconv($default,'latin1//IGNORE',$str);
}
} else{
return $str;
}
}else{
return stripos($encode,$default)===false ? false : true;
}
}
public function gzip_decode($data)
{
$len = strlen($data);
if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b"))
return null; // Not GZIP format (See RFC 1952)
$method = ord(substr($data,2,1)); // Compression method
$flags = ord(substr($data,3,1)); // Flags
if ($flags & $flags != 31) // Reserved bits are set -- NOT ALLOWED by RFC 1952
return null;
$mtime = unpack("V", substr($data,4,4)); // NOTE: $mtime may be negative (PHP integer limitations)
$mtime = $mtime[1];
$xfl = substr($data,8,1);
$os = substr($data,8,1);
$header_len = 10;
$extra_len = 0;
$extra = "";
if ($flags & 4)
{
if ($len - $header_len - 2 < 8) // 2-byte length prefixed EXTRA data in header , Invalid format
return false;
$extra_len = unpack("v",substr($data,8,2));
$extra_len = $extra_len[1];
if ($len - $header_len - 2 - $extra_len < 8) // Invalid format
return false;
$extra = substr($data,10,$extra_len);
$header_len += 2 + $extra_len;
}
$filename_len = 0;
$filename = "";
if ($flags & 8) // C-style string file NAME data in header
{
if ($len - $header_len - 1 < 8)
return false; // Invalid format
$filename_len = strpos(substr($data,8+$extra_len),chr(0));
if ($filename_len === false || $len - $header_len - $filename_len - 1 < 8)
return false; // Invalid format
$filename = substr($data,$header_len,$filename_len);
$header_len += $filename_len + 1;
}
$comment_len = 0;
$comment = "";
if ($flags & 16)
{
if ($len - $header_len - 1 < 8)
return false; // C-style string COMMENT data in header Invalid format
$comment_len = strpos(substr($data,8+$extra_len+$filename_len),chr(0));
if ($comment_len === false || $len - $header_len - $comment_len - 1 < 8)
return false; // Invalid header format
$comment = substr($data,$header_len,$comment_len);
$header_len += $comment_len + 1;
}
$header_crc = "";
if ($flags & 1)
{
if ($len - $header_len - 2 < 8)
return false; // 2-bytes (lowest order) of CRC32 on header present Invalid format
$calc_crc = crc32(substr($data,0,$header_len)) & 0xffff;
$header_crc = unpack("v", substr($data,$header_len,2));
$header_crc = $header_crc[1];
if ($header_crc != $calc_crc)
return false; // Bad header CRC
$header_len += 2;
}
$data_crc = unpack("V",substr($data,-8,4));// GZIP FOOTER - These be negative due to PHP's limitations
$data_crc = $data_crc[1];
$i_size = unpack("V",substr($data,-4));
$i_size = $i_size[1]; // Perform the decompression:
$body_len = $len-$header_len-8;
if ($body_len < 1)
return null; // This should never happen - IMPLEMENTATION BUG!
$body = substr($data,$header_len,$body_len);
$data = "";
if ($body_len > 0)
{
switch ($method) {
case 8:$data = gzinflate($body);break; // Currently the only supported compression method:
default:
return false; // Unknown compression method
}
} else {
// I'm not sure if zero-byte body content is allowed. Allow it for now... Do nothing...
}
// Verifiy decompressed size and CRC32:
// NOTE: This may fail with large data sizes depending on how
// PHP's integer limitations affect strlen() since $i_size
// may be negative for large sizes.
if ($i_size != strlen($data) || crc32($data) != $data_crc) return false; // Bad format! Length or CRC doesn't match!
return $data;
}
public function curl_get($url,$gzip = false)
{
$header = array(
'Accept: application/json',
);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url); //设置抓取的url
curl_setopt($curl, CURLOPT_HEADER, 0); //设置头文件的信息作为数据流输出
curl_setopt($curl, CURLOPT_TIMEOUT, 1); //超时设置,以秒为单位
if($gzip)
curl_setopt($curl, CURLOPT_ENCODING, "gzip"); //压缩转码
curl_setopt($curl, CURLOPT_TIMEOUT_MS, 5000); //超时设置,以毫秒为单位
curl_setopt($curl, CURLOPT_HTTPHEADER, $header); //设置请求头
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); //设置获取的信息以文件流的形式返回,而不是直接输出。
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
$data = curl_exec($curl); //执行命令
if (curl_error($curl)) {
$data = "Error:".curl_error($curl); //显示错误信息
} else {
curl_close($curl);
}
return $data;
}
public function curl_post($url, $postdata)
{
$header = array(
'Accept: application/json',
);
$curl = curl_init();//初始化
curl_setopt($curl, CURLOPT_URL, $url); //设置抓取的url
curl_setopt($curl, CURLOPT_HEADER, 0); //设置头文件的信息作为数据流输出
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); //设置获取的信息以文件流的形式返回,而不是直接输出。
curl_setopt($curl, CURLOPT_TIMEOUT, 10); //超时设置
curl_setopt($curl, CURLOPT_TIMEOUT_MS, 500); //超时设置,以毫秒为单位
curl_setopt($curl, CURLOPT_HTTPHEADER, $header); //设置请求头
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_POST, 1); //设置post方式提交
curl_setopt($curl, CURLOPT_POSTFIELDS, $postdata);
$data = curl_exec($curl); //执行命令
if (curl_error($curl)) {
$data = "Error:".curl_error($curl); // 显示错误信息
} else {
curl_close($curl);
}
return $data;
}
/**
* @param $url string
* @param $save_dir string
* @param $filename string
* @param null $reafure_url
*/
private function download_file($url,$file,$reafure_url=null)
{
$res = array('code'=>0,'size'=>0);
if(empty($url) || empty($file))
{
$res['code'] = -100;
return $res;
}
$ext = strrchr($url,'.');
if(strpos($url,'data:image')===false && $ext != '.png' && $ext != '.jpg' && $ext != '.gif' )
{
$res['code'] = -200;
return $res;
}
$byte = null;
//方式一
ob_start();
@readfile($url);
$img = ob_get_contents();
ob_end_clean();
if($img && strlen($img)>10)
{
$fp = @fopen($file,'w+');
$byte = @fwrite($fp,$img);
@fclose($fp);
if($byte && $byte > 10)
{
return array('code'=>201,'size'=>$byte);
}
}
//方式二
$img = @file_get_contents($url);
if($img && strlen($img)>10)
{
$byte = @file_put_contents($file,$img);
if(!$byte)
{
@chmod(dirname($file),755);
$byte = @file_put_contents($file, @file_get_contents($url));
if($byte && $byte>10)
{
return array('code'=>202,'size'=>$byte);
}
}
}
//方式三
// $parse = parse_url($url);
// if (isset($parse['host']))
// {
// $ip = @getIp();
// $fp = @fopen($file, 'wb');
// $ch = @curl_init();
// @curl_setopt($ch, CURLOPT_URL, $url);
// @curl_setopt($ch, CURLOPT_FILE, $fp);
// @curl_setopt($ch, CURLOPT_HEADER, 0);
// @curl_setopt($ch, CURLOPT_HTTPHEADER, array(
// 'CLIENT-IP:' . $ip,
// 'X-FORWARDED-FOR:' . $ip,
// 'User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36',
// 'Host:' . $parse['host'],
// ));
// @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
// @curl_setopt($ch, CURLOPT_TIMEOUT, 180);
// @curl_exec($ch);
// @curl_close($ch);
// @fclose($fp);
// $byte = filesize($file);
// if ($byte > 10){
// return 203;
// }else{
// @unlink($file);
// }
// }
$res['code'] = -300;
return $res;
}
public function getIp()
{
if(getenv('HTTP_CLIENT_IP')){
$onlineip = getenv('HTTP_CLIENT_IP');
}
elseif(getenv('HTTP_X_FORWARDED_FOR'))
{
$onlineip = getenv('HTTP_X_FORWARDED_FOR');
}
elseif(getenv('REMOTE_ADDR'))
{
$onlineip = getenv('REMOTE_ADDR');
} else{
$onlineip = $_SERVER['REMOTE_ADDR'];
}
return $onlineip;
}
public function ascii_decode($de_str,$prefix='&#')
{
$utf = '';
$de_str = str_replace($prefix, "", $de_str);
foreach(explode(";", $de_str) as $dec) {
if ($dec < 128){
$utf.= chr($dec);
}elseif($dec <2048){
$utf .= chr(192 + (($dec - ($dec % 64)) / 64));
$utf .= chr(128 + ($dec % 64));
}else{
$utf .= chr(224 + (($dec - ($dec % 4096)) / 4096));
$utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
$utf .= chr(128 + ($dec % 64));
}
}
return $utf;
}
}
空空如也