class="hljs-ln-code"> class="hljs-ln-line">using System.Collections.Generic; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="3"> class="hljs-ln-code"> class="hljs-ln-line">using System.Text; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="4"> class="hljs-ln-code"> class="hljs-ln-line">using System.Collections; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="5"> class="hljs-ln-code"> class="hljs-ln-line">using xu_common.log; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="6"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="7"> class="hljs-ln-code"> class="hljs-ln-line">namespace data_collection.spider_core class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="8"> class="hljs-ln-code"> class="hljs-ln-line">{ class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="9"> class="hljs-ln-code"> class="hljs-ln-line"> class UrlSet class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="10"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="11"> class="hljs-ln-code"> class="hljs-ln-line"> public UrlSet() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="12"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="13"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="14"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="15"> class="hljs-ln-code"> class="hljs-ln-line"> _going_to_parse = new ArrayList(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="16"> class="hljs-ln-code"> class="hljs-ln-line"> _already_parse = new ArrayList(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="17"> class="hljs-ln-code"> class="hljs-ln-line"> _error_link = new ArrayList(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="18"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="19"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="20"> class="hljs-ln-code"> class="hljs-ln-line"> private static UrlSet __instance = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="21"> class="hljs-ln-code"> class="hljs-ln-line"> public static UrlSet instance class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="22"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="23"> class="hljs-ln-code"> class="hljs-ln-line"> get class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="24"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="25"> class="hljs-ln-code"> class="hljs-ln-line"> if (__instance == null) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="26"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="27"> class="hljs-ln-code"> class="hljs-ln-line"> __instance = new UrlSet(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="28"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="29"> class="hljs-ln-code"> class="hljs-ln-line"> return __instance; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="30"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="31"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="32"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="33"> class="hljs-ln-code"> class="hljs-ln-line"> private ArrayList _going_to_parse = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="34"> class="hljs-ln-code"> class="hljs-ln-line"> private ArrayList _already_parse = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="35"> class="hljs-ln-code"> class="hljs-ln-line"> private ArrayList _error_link = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="36"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="37"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="38"> class="hljs-ln-code"> class="hljs-ln-line"> private bool is_url_parsed(string url, bool add) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="39"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="40"> class="hljs-ln-code"> class="hljs-ln-line"> bool rv; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="41"> class="hljs-ln-code"> class="hljs-ln-line"> lock (_already_parse.SyncRoot) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="42"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="43"> class="hljs-ln-code"> class="hljs-ln-line"> rv = _already_parse.Contains(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="44"> class="hljs-ln-code"> class="hljs-ln-line"> if (!rv && add) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="45"> class="hljs-ln-code"> class="hljs-ln-line"> _already_parse.Add(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="46"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="47"> class="hljs-ln-code"> class="hljs-ln-line"> return rv; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="48"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="49"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="50"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="51"> class="hljs-ln-code"> class="hljs-ln-line"> private bool is_url_parsed(string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="52"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="53"> class="hljs-ln-code"> class="hljs-ln-line"> return is_url_parsed(url, false); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="54"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="55"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="56"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="57"> class="hljs-ln-code"> class="hljs-ln-line"> private bool is_url_going_to_parse(string url, bool add) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="58"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="59"> class="hljs-ln-code"> class="hljs-ln-line"> bool rv; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="60"> class="hljs-ln-code"> class="hljs-ln-line"> lock (_going_to_parse.SyncRoot) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="61"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="62"> class="hljs-ln-code"> class="hljs-ln-line"> rv = _going_to_parse.Contains(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="63"> class="hljs-ln-code"> class="hljs-ln-line"> if (!rv && add) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="64"> class="hljs-ln-code"> class="hljs-ln-line"> _going_to_parse.Add(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="65"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="66"> class="hljs-ln-code"> class="hljs-ln-line"> return rv; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="67"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="68"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="69"> class="hljs-ln-code"> class="hljs-ln-line"> private bool is_url_going_to_parse(string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="70"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="71"> class="hljs-ln-code"> class="hljs-ln-line"> return is_url_going_to_parse(url, false); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="72"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="73"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="74"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="75"> class="hljs-ln-code"> class="hljs-ln-line"> private bool is_url_error_lnk(string url, bool add) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="76"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="77"> class="hljs-ln-code"> class="hljs-ln-line"> bool rv; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="78"> class="hljs-ln-code"> class="hljs-ln-line"> lock (_error_link.SyncRoot) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="79"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="80"> class="hljs-ln-code"> class="hljs-ln-line"> rv = _error_link.Contains(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="81"> class="hljs-ln-code"> class="hljs-ln-line"> if (!rv && add) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="82"> class="hljs-ln-code"> class="hljs-ln-line"> _already_parse.Add(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="83"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="84"> class="hljs-ln-code"> class="hljs-ln-line"> return rv; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="85"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="86"> class="hljs-ln-code"> class="hljs-ln-line"> private bool is_url_error_lnk(string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="87"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="88"> class="hljs-ln-code"> class="hljs-ln-line"> return is_url_error_lnk(url, false); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="89"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="90"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="91"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="92"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="93"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="94"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="95"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="96"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="97"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="98"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="99"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="100"> class="hljs-ln-code"> class="hljs-ln-line"> public int add_going_parse_url(string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="101"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="102"> class="hljs-ln-code"> class="hljs-ln-line"> lock (_going_to_parse.SyncRoot) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="103"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="104"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_parsed(url)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="105"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="106"> class="hljs-ln-code"> class="hljs-ln-line"> return -1; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="107"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="108"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_error_lnk(url)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="109"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="110"> class="hljs-ln-code"> class="hljs-ln-line"> return -2; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="111"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="112"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_going_to_parse(url, true)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="113"> class="hljs-ln-code"> class="hljs-ln-line"> return 1; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="114"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="115"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="116"> class="hljs-ln-code"> class="hljs-ln-line"> return 0; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="117"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="118"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="119"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="120"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="121"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="122"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="123"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="124"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="125"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="126"> class="hljs-ln-code"> class="hljs-ln-line"> public int add_parsed_url(string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="127"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="128"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="129"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_going_to_parse(url)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="130"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="131"> class="hljs-ln-code"> class="hljs-ln-line"> _going_to_parse.Remove(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="132"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="133"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_error_lnk(url)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="134"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="135"> class="hljs-ln-code"> class="hljs-ln-line"> _error_link.Remove(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="136"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="137"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="138"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_parsed(url, true)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="139"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="140"> class="hljs-ln-code"> class="hljs-ln-line"> return -1; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="141"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="142"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="143"> class="hljs-ln-code"> class="hljs-ln-line"> return 0; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="144"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="145"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="146"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="147"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="148"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="149"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="150"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="151"> class="hljs-ln-code"> class="hljs-ln-line"> public int add_error_url(string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="152"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="153"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_going_to_parse(url)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="154"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="155"> class="hljs-ln-code"> class="hljs-ln-line"> _going_to_parse.Remove(url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="156"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="157"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="158"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="159"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="160"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="161"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="162"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="163"> class="hljs-ln-code"> class="hljs-ln-line"> if (is_url_error_lnk(url, true)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="164"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="165"> class="hljs-ln-code"> class="hljs-ln-line"> return -1; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="166"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="167"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="168"> class="hljs-ln-code"> class="hljs-ln-line"> return 0; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="169"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="170"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="171"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="172"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="173"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="174"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="175"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="176"> class="hljs-ln-code"> class="hljs-ln-line"> public bool pop_going_to_parse_url(ref string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="177"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="178"> class="hljs-ln-code"> class="hljs-ln-line"> url = ""; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="179"> class="hljs-ln-code"> class="hljs-ln-line"> bool rv = false; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="180"> class="hljs-ln-code"> class="hljs-ln-line"> lock (_going_to_parse.SyncRoot) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="181"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="182"> class="hljs-ln-code"> class="hljs-ln-line"> if (_going_to_parse.Count <= 0) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="183"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="184"> class="hljs-ln-code"> class="hljs-ln-line"> rv = false; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="185"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="186"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="187"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="188"> class="hljs-ln-code"> class="hljs-ln-line"> url = _going_to_parse[0].ToString(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="189"> class="hljs-ln-code"> class="hljs-ln-line"> _going_to_parse.RemoveAt(0); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="190"> class="hljs-ln-code"> class="hljs-ln-line"> rv = true; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="191"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="192"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="193"> class="hljs-ln-code"> class="hljs-ln-line"> return rv; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="194"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="195"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="196"> class="hljs-ln-code"> class="hljs-ln-line"> public int going_to_parse_url_num() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="197"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="198"> class="hljs-ln-code"> class="hljs-ln-line"> int ret = 0; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="199"> class="hljs-ln-code"> class="hljs-ln-line"> lock (_going_to_parse.SyncRoot) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="200"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="201"> class="hljs-ln-code"> class="hljs-ln-line"> ret = _going_to_parse.Count; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="202"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="203"> class="hljs-ln-code"> class="hljs-ln-line"> return ret; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="204"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="205"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="206"> class="hljs-ln-code"> class="hljs-ln-line"> private string[] _no_parse_keyword = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="207"> class="hljs-ln-code"> class="hljs-ln-line"> private int _no_parse_type = 3; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="208"> class="hljs-ln-code"> class="hljs-ln-line"> public void SetNoParseKeyWord(string str, string split, int type) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="209"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="210"> class="hljs-ln-code"> class="hljs-ln-line"> _no_parse_keyword = xu_common.CommonOperator.Split(str, split); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="211"> class="hljs-ln-code"> class="hljs-ln-line"> _no_parse_type = type; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="212"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="213"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="214"> class="hljs-ln-code"> class="hljs-ln-line"> public bool IsNoParse(string url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="215"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="216"> class="hljs-ln-code"> class="hljs-ln-line"> LogMsg.LogError(url + ", no_parse_type="+ _no_parse_type.ToString()); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="217"> class="hljs-ln-code"> class="hljs-ln-line"> if (_no_parse_type == 1) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="218"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="219"> class="hljs-ln-code"> class="hljs-ln-line"> for (int i = 0; i < _no_parse_keyword.Length; i++) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="220"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="221"> class="hljs-ln-code"> class="hljs-ln-line"> if (url.Contains(_no_parse_keyword[i])) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="222"> class="hljs-ln-code"> class="hljs-ln-line"> return false; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="223"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="224"> class="hljs-ln-code"> class="hljs-ln-line"> return true; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="225"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="226"> class="hljs-ln-code"> class="hljs-ln-line"> else if(_no_parse_type==2) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="227"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="228"> class="hljs-ln-code"> class="hljs-ln-line"> for (int i = 0; i < _no_parse_keyword.Length; i++) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="229"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="230"> class="hljs-ln-code"> class="hljs-ln-line"> if (url.Contains(_no_parse_keyword[i])) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="231"> class="hljs-ln-code"> class="hljs-ln-line"> return true; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="232"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="233"> class="hljs-ln-code"> class="hljs-ln-line"> return false; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="234"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="235"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="236"> class="hljs-ln-code"> class="hljs-ln-line"> return false; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="237"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="238"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="239"> class="hljs-ln-code"> class="hljs-ln-line"> #region write back to file: ToString class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="240"> class="hljs-ln-code"> class="hljs-ln-line"> public string GoingParseToString() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="241"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="242"> class="hljs-ln-code"> class="hljs-ln-line"> string ret = ""; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="243"> class="hljs-ln-code"> class="hljs-ln-line"> int count = _going_to_parse.Count; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="244"> class="hljs-ln-code"> class="hljs-ln-line"> for (int i = 0; i < count-1; i++) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="245"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="246"> class="hljs-ln-code"> class="hljs-ln-line"> ret += _going_to_parse[i].ToString() + "\r\n"; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="247"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="248"> class="hljs-ln-code"> class="hljs-ln-line"> if (count > 0) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="249"> class="hljs-ln-code"> class="hljs-ln-line"> ret += _going_to_parse[count - 1].ToString(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="250"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="251"> class="hljs-ln-code"> class="hljs-ln-line"> _going_to_parse.Clear(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="252"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="253"> class="hljs-ln-code"> class="hljs-ln-line"> return ret; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="254"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="255"> class="hljs-ln-code"> class="hljs-ln-line"> public string AlreadyParsedToString() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="256"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="257"> class="hljs-ln-code"> class="hljs-ln-line"> string ret = ""; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="258"> class="hljs-ln-code"> class="hljs-ln-line"> int count = _already_parse.Count; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="259"> class="hljs-ln-code"> class="hljs-ln-line"> for (int i = 0; i < count - 1; i++) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="260"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="261"> class="hljs-ln-code"> class="hljs-ln-line"> ret += _already_parse[i].ToString() + "\r\n"; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="262"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="263"> class="hljs-ln-code"> class="hljs-ln-line"> if (count > 0) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="264"> class="hljs-ln-code"> class="hljs-ln-line"> ret += _already_parse[count - 1].ToString(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="265"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="266"> class="hljs-ln-code"> class="hljs-ln-line"> _already_parse.Clear(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="267"> class="hljs-ln-code"> class="hljs-ln-line"> return ret; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="268"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="269"> class="hljs-ln-code"> class="hljs-ln-line"> public string ErrorUrlToString() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="270"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="271"> class="hljs-ln-code"> class="hljs-ln-line"> string ret = ""; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="272"> class="hljs-ln-code"> class="hljs-ln-line"> int count = _error_link.Count; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="273"> class="hljs-ln-code"> class="hljs-ln-line"> for (int i = 0; i < count - 1; i++) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="274"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="275"> class="hljs-ln-code"> class="hljs-ln-line"> ret += _error_link[i].ToString() + "\r\n"; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="276"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="277"> class="hljs-ln-code"> class="hljs-ln-line"> if (count > 0) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="278"> class="hljs-ln-code"> class="hljs-ln-line"> ret += _error_link[count - 1].ToString(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="279"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="280"> class="hljs-ln-code"> class="hljs-ln-line"> _error_link.Clear(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="281"> class="hljs-ln-code"> class="hljs-ln-line"> return ret; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="282"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="283"> class="hljs-ln-code"> class="hljs-ln-line"> #endregion class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="284"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="285"> class="hljs-ln-code"> class="hljs-ln-line">} class="hide-preCode-box">
class="hljs-button signin active" data-title="登录复制" data-report-click="{"spm":"1001.2101.3001.4334"}" onclick="hljs.signin(event)">
二、下载和解析网页
以下代码是一个线程抓取文件(非网页内容,如图片、文件)的主要代码:设置一个URL,抓取后把URL放到已抓取队列,并保存网页内容到文件中; 抓取失败,则把URL放到错误队列。
有些注释掉的 MessageBox是博主当时调试用的,大家可忽略。
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="1"> class="hljs-ln-code"> class="hljs-ln-line">namespace data_collection.spider_core
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="2"> class="hljs-ln-code"> class="hljs-ln-line">{
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="3"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="4"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="5"> class="hljs-ln-code"> class="hljs-ln-line"> class DownFileTask : xu_common.thread.Task
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="6"> class="hljs-ln-code"> class="hljs-ln-line"> {
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="7"> class="hljs-ln-code"> class="hljs-ln-line"> public override void Run()
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="8"> class="hljs-ln-code"> class="hljs-ln-line"> {
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="9"> class="hljs-ln-code"> class="hljs-ln-line"> FileStream fileStream = new FileStream(_filepath, FileMode.Append|FileMode.Create, FileAccess.Write);
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="10"> class="hljs-ln-code"> class="hljs-ln-line"> Stream inStream = null;
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="11"> class="hljs-ln-code"> class="hljs-ln-line"> try
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="12"> class="hljs-ln-code"> class="hljs-ln-line"> {
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="13"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="14"> class="hljs-ln-code"> class="hljs-ln-line"> HttpWebRequest myre = (HttpWebRequest)WebRequest.Create(_url);
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="15"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="16"> class="hljs-ln-code"> class="hljs-ln-line"> if (fileStream.Length == myre.ContentLength)
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="17"> class="hljs-ln-code"> class="hljs-ln-line"> {
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="18"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="19"> class="hljs-ln-code"> class="hljs-ln-line"> return;
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="20"> class="hljs-ln-code"> class="hljs-ln-line"> }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="21"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="22"> class="hljs-ln-code"> class="hljs-ln-line"> myre.AddRange(Convert.ToInt32(fileStream.Length));
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="23"> class="hljs-ln-code"> class="hljs-ln-line"> HttpWebResponse response = (HttpWebResponse)myre.GetResponse();
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="24"> class="hljs-ln-code"> class="hljs-ln-line"> inStream = response.GetResponseStream();
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="25"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="26"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="27"> class="hljs-ln-code"> class="hljs-ln-line"> int length = 1024;
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="28"> class="hljs-ln-code"> class="hljs-ln-line"> byte[] buffer = new byte[1025];
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="29"> class="hljs-ln-code"> class="hljs-ln-line"> int readerLength = 0, currentLength = 0;
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="30"> class="hljs-ln-code"> class="hljs-ln-line"> while ((readerLength = inStream.Read(buffer, 0, length)) > 0)
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="31"> class="hljs-ln-code"> class="hljs-ln-line"> {
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="32"> class="hljs-ln-code"> class="hljs-ln-line"> currentLength += readerLength;
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="33"> class="hljs-ln-code"> class="hljs-ln-line"> fileStream.Write(buffer, 0, readerLength);
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="34"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="35"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="36"> class="hljs-ln-code"> class="hljs-ln-line"> fileStream.Flush();
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="37"> class="hljs-ln-code"> class="hljs-ln-line"> }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="38"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="39"> class="hljs-ln-code"> class="hljs-ln-line"> fileStream.Close();
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="40"> class="hljs-ln-code"> class="hljs-ln-line"> inStream.Close();
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="41"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="42"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="43"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="44"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="45"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_parsed_url(_url);
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="46"> class="hljs-ln-code"> class="hljs-ln-line"> }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="47"> class="hljs-ln-code"> class="hljs-ln-line"> catch (Exception ex)
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="48"> class="hljs-ln-code"> class="hljs-ln-line"> {
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="49"> class="hljs-ln-code"> class="hljs-ln-line"> xu_common.log.LogMsg.LogError("down file:" + _url + ", error.msg:" + ex.ToString());
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="50"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="51"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_error_url(_url);
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="52"> class="hljs-ln-code"> class="hljs-ln-line"> }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="53"> class="hljs-ln-code"> class="hljs-ln-line"> }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="54"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="55"> class="hljs-ln-code"> class="hljs-ln-line"> public void SetUrl(string url) { _url = url; }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="56"> class="hljs-ln-code"> class="hljs-ln-line"> private string _url;
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="57"> class="hljs-ln-code"> class="hljs-ln-line">
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="58"> class="hljs-ln-code"> class="hljs-ln-line"> public void SetFilePath(string filepath) { _filepath = filepath; }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="59"> class="hljs-ln-code"> class="hljs-ln-line"> private string _filepath;
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="60"> class="hljs-ln-code"> class="hljs-ln-line"> }
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="61"> class="hljs-ln-code"> class="hljs-ln-line">}
class="hide-preCode-box">
class="hljs-button signin active" data-title="登录复制" data-report-click="{"spm":"1001.2101.3001.4334"}" onclick="hljs.signin(event)">
以上是下载文件和图像的代码。
------------------------------------------
以下代码是下载网页的核心代码。
1.初始化代抓取的URL,设置浏览器代理(欺骗对应网站)。
2.下载URL对应的网页内容。
3.从URL的内容中解析结构化内容(根据规则),同时根据网页内容,解析到很多URL(网页的外链)。
3.1 如果URL是需要下载的文件或图片,则新起一个线程,用上面的代码下载。
3.2 如果URL是要继续抓取的网页URL,则放到待爬取链接。
注(重要):此处,抓取到的内容(保存为_content)的解析,这里博主写了一个规则处理器。所以下面代码一句话带过。规则处理器比较简单,根据下载的网站不同。比如你想下载 http://xx.com/a/1.html,网站的内容,一般是通用的,我们就可以设置一个规则:只要满足URL为http://xx.com/a/*.html的,则按以下规则处理:跳过………………等代码,直到找到内容开始处,这里需要我们实际看一下http://xx.com/a/1.html的源码,一般是或
等特征开始。我们把这些内容跳过即可。
- class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="1">
class="hljs-ln-code"> class="hljs-ln-line">using System; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="2"> class="hljs-ln-code"> class="hljs-ln-line">using System.Collections.Generic; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="3"> class="hljs-ln-code"> class="hljs-ln-line">using System.Text; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="4"> class="hljs-ln-code"> class="hljs-ln-line">using System.Text.RegularExpressions; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="5"> class="hljs-ln-code"> class="hljs-ln-line">using System.Net; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="6"> class="hljs-ln-code"> class="hljs-ln-line">using xu_common.log; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="7"> class="hljs-ln-code"> class="hljs-ln-line">using System.Collections; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="8"> class="hljs-ln-code"> class="hljs-ln-line">using System.IO; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="9"> class="hljs-ln-code"> class="hljs-ln-line">using System.IO.Compression; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="10"> class="hljs-ln-code"> class="hljs-ln-line">using System.Security.Policy; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="11"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="12"> class="hljs-ln-code"> class="hljs-ln-line">namespace data_collection.spider_core class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="13"> class="hljs-ln-code"> class="hljs-ln-line">{ class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="14"> class="hljs-ln-code"> class="hljs-ln-line"> class WebGetHtml class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="15"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="16"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="17"> class="hljs-ln-code"> class="hljs-ln-line"> public WebGetHtml(string parse_url) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="18"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="19"> class="hljs-ln-code"> class="hljs-ln-line"> _url = parse_url; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="20"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="21"> class="hljs-ln-code"> class="hljs-ln-line"> string _url = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="22"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="23"> class="hljs-ln-code"> class="hljs-ln-line"> string _tag_base_url = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="24"> class="hljs-ln-code"> class="hljs-ln-line"> string _title; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="25"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="26"> class="hljs-ln-code"> class="hljs-ln-line"> string _base_url = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="27"> class="hljs-ln-code"> class="hljs-ln-line"> string _base_top_url = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="28"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="29"> class="hljs-ln-code"> class="hljs-ln-line"> string _content; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="30"> class="hljs-ln-code"> class="hljs-ln-line"> private void GenBaseUrl() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="31"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="32"> class="hljs-ln-code"> class="hljs-ln-line"> if (_tag_base_url == null) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="33"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="34"> class="hljs-ln-code"> class="hljs-ln-line"> _base_top_url = global_var.RegTopUrl.Match(_url).Value; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="35"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="36"> class="hljs-ln-code"> class="hljs-ln-line"> string not_param_url = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="37"> class="hljs-ln-code"> class="hljs-ln-line"> int qPisition = this._url.IndexOf("?"); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="38"> class="hljs-ln-code"> class="hljs-ln-line"> if (qPisition < 0) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="39"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="40"> class="hljs-ln-code"> class="hljs-ln-line"> not_param_url = _url; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="41"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="42"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="43"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="44"> class="hljs-ln-code"> class="hljs-ln-line"> not_param_url = _url.Substring(0, qPisition); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="45"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="46"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="47"> class="hljs-ln-code"> class="hljs-ln-line"> _base_url = Regex.Replace(not_param_url, "(?<=.*/)[^/]*$", "", RegexOptions.Compiled | RegexOptions.IgnoreCase); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="48"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="49"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="50"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="51"> class="hljs-ln-code"> class="hljs-ln-line"> _base_top_url = global_var.RegTopUrl.Match(_tag_base_url).Value; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="52"> class="hljs-ln-code"> class="hljs-ln-line"> _base_url = _tag_base_url; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="53"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="54"> class="hljs-ln-code"> class="hljs-ln-line"> if (_base_url.EndsWith("/")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="55"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="56"> class="hljs-ln-code"> class="hljs-ln-line"> _base_url = _base_url.Substring(0, _base_url.Length - 1); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="57"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="58"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="59"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="60"> class="hljs-ln-code"> class="hljs-ln-line"> public class UrlType class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="61"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="62"> class="hljs-ln-code"> class="hljs-ln-line"> public static int UrlTypeImg = 1; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="63"> class="hljs-ln-code"> class="hljs-ln-line"> public static int UrlTypeFile = 2; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="64"> class="hljs-ln-code"> class="hljs-ln-line"> public static int UrlTypeHtml = 3; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="65"> class="hljs-ln-code"> class="hljs-ln-line"> public static int UrlTypeError = 4; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="66"> class="hljs-ln-code"> class="hljs-ln-line"> public static int UrlTypeSelf = 5; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="67"> class="hljs-ln-code"> class="hljs-ln-line"> public static int UrlTypeOtherFile = 6; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="68"> class="hljs-ln-code"> class="hljs-ln-line"> }; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="69"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="70"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="71"> class="hljs-ln-code"> class="hljs-ln-line"> private int CheckUrl(string UrltoCheck) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="72"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="73"> class="hljs-ln-code"> class="hljs-ln-line"> if (Regex.IsMatch(UrltoCheck, "^#*$", RegexOptions.IgnoreCase | RegexOptions.Compiled)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="74"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeError; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="75"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck == _url || (UrltoCheck + "/") == _url || UrltoCheck == (_url + "/")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="76"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeSelf; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="77"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".css")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="78"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeOtherFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="79"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".wmv")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="80"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="81"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".asf")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="82"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="83"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".mp3")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="84"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="85"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".avi")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="86"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="87"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".mpg")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="88"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="89"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".mpeg")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="90"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="91"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".rmvb")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="92"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="93"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".rm")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="94"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="95"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".doc")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="96"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="97"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".rar")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="98"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="99"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".zip")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="100"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="101"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".tar")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="102"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="103"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".xls")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="104"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="105"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".pdf")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="106"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeFile; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="107"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".jpg")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="108"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeImg; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="109"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".jpeg")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="110"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeImg; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="111"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".ico")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="112"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeImg; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="113"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".gif")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="114"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeImg; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="115"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".bmp")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="116"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeImg; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="117"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.EndsWith(".png")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="118"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeImg; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="119"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.StartsWith("ftp://")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="120"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeError; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="121"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.StartsWith("telnet://")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="122"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeError; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="123"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.StartsWith("mms://")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="124"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeError; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="125"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.StartsWith("rstp://")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="126"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeError; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="127"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.StartsWith("mailto")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="128"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeError; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="129"> class="hljs-ln-code"> class="hljs-ln-line"> else if (UrltoCheck.StartsWith("javascript")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="130"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeError; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="131"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="132"> class="hljs-ln-code"> class="hljs-ln-line"> return UrlType.UrlTypeHtml; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="133"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="134"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="135"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="136"> class="hljs-ln-code"> class="hljs-ln-line"> private bool CheckUrlThisSite(string NewUrltoCheck) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="137"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="138"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="139"> class="hljs-ln-code"> class="hljs-ln-line"> return global_var.Instance.IsInSite(NewUrltoCheck); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="140"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="141"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="142"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="143"> class="hljs-ln-code"> class="hljs-ln-line"> private string GenUrl(string incomeUrl) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="144"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="145"> class="hljs-ln-code"> class="hljs-ln-line"> if (incomeUrl.StartsWith("http://")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="146"> class="hljs-ln-code"> class="hljs-ln-line"> return incomeUrl; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="147"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="148"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="149"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="150"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="151"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="152"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="153"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="154"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="155"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="156"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="157"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="158"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="159"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="160"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="161"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="162"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="163"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="164"> class="hljs-ln-code"> class="hljs-ln-line"> if (incomeUrl.StartsWith("/")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="165"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="166"> class="hljs-ln-code"> class="hljs-ln-line"> string trueUrl = _base_top_url + incomeUrl; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="167"> class="hljs-ln-code"> class="hljs-ln-line"> return trueUrl; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="168"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="169"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="170"> class="hljs-ln-code"> class="hljs-ln-line"> int parent_depth = 0; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="171"> class="hljs-ln-code"> class="hljs-ln-line"> while (incomeUrl.StartsWith(".")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="172"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="173"> class="hljs-ln-code"> class="hljs-ln-line"> if (incomeUrl.StartsWith("../")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="174"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="175"> class="hljs-ln-code"> class="hljs-ln-line"> parent_depth += 1; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="176"> class="hljs-ln-code"> class="hljs-ln-line"> incomeUrl.Substring(3, incomeUrl.Length - 3); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="177"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="178"> class="hljs-ln-code"> class="hljs-ln-line"> else if (incomeUrl.StartsWith("./")) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="179"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="180"> class="hljs-ln-code"> class="hljs-ln-line"> incomeUrl = incomeUrl.Substring(2, incomeUrl.Length - 2); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="181"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="182"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="183"> class="hljs-ln-code"> class="hljs-ln-line"> return null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="184"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="185"> class="hljs-ln-code"> class="hljs-ln-line"> string head_str = _base_url; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="186"> class="hljs-ln-code"> class="hljs-ln-line"> if (parent_depth > 0) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="187"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="188"> class="hljs-ln-code"> class="hljs-ln-line"> for (int i = 0; i < parent_depth; i++) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="189"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="190"> class="hljs-ln-code"> class="hljs-ln-line"> int qposition = head_str.LastIndexOf("/"); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="191"> class="hljs-ln-code"> class="hljs-ln-line"> if (qposition < 0) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="192"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="193"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="194"> class="hljs-ln-code"> class="hljs-ln-line"> head_str = _base_top_url; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="195"> class="hljs-ln-code"> class="hljs-ln-line"> break; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="196"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="197"> class="hljs-ln-code"> class="hljs-ln-line"> head_str = head_str.Substring(0, qposition); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="198"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="199"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="200"> class="hljs-ln-code"> class="hljs-ln-line"> if (head_str.StartsWith("http:") && head_str.Length < "http://".Length) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="201"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="202"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="203"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="204"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="205"> class="hljs-ln-code"> class="hljs-ln-line"> head_str = _base_top_url; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="206"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="207"> class="hljs-ln-code"> class="hljs-ln-line"> return head_str + "/" + incomeUrl; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="208"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="209"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="210"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="211"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="212"> class="hljs-ln-code"> class="hljs-ln-line"> private bool WebGetContent() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="213"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="214"> class="hljs-ln-code"> class="hljs-ln-line"> HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(_url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="215"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="216"> class="hljs-ln-code"> class="hljs-ln-line"> myRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36"; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="217"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="218"> class="hljs-ln-code"> class="hljs-ln-line"> HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="219"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="220"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="221"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="222"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="223"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="224"> class="hljs-ln-code"> class="hljs-ln-line"> if (myResponse.StatusCode != HttpStatusCode.OK) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="225"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="226"> class="hljs-ln-code"> class="hljs-ln-line"> LogMsg.LogError("get url:" + _url + ", error. status:"+myResponse.StatusDescription); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="227"> class="hljs-ln-code"> class="hljs-ln-line"> return false; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="228"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="229"> class="hljs-ln-code"> class="hljs-ln-line"> Stream src_stream = myResponse.GetResponseStream(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="230"> class="hljs-ln-code"> class="hljs-ln-line"> StreamReader myStreamReader = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="231"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="232"> class="hljs-ln-code"> class="hljs-ln-line"> if (myResponse.ContentEncoding != null && class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="233"> class="hljs-ln-code"> class="hljs-ln-line"> myResponse.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="234"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="235"> class="hljs-ln-code"> class="hljs-ln-line"> myStreamReader = new StreamReader(new GZipStream(src_stream, CompressionMode.Decompress), class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="236"> class="hljs-ln-code"> class="hljs-ln-line"> Encoding.Default); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="237"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="238"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="239"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="240"> class="hljs-ln-code"> class="hljs-ln-line"> myStreamReader = new StreamReader(src_stream, class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="241"> class="hljs-ln-code"> class="hljs-ln-line"> Encoding.Default); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="242"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="243"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="244"> class="hljs-ln-code"> class="hljs-ln-line"> ArrayList response_bytes_array = new ArrayList(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="245"> class="hljs-ln-code"> class="hljs-ln-line"> int i_byte = 0; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="246"> class="hljs-ln-code"> class="hljs-ln-line"> while (true) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="247"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="248"> class="hljs-ln-code"> class="hljs-ln-line"> i_byte = myStreamReader.BaseStream.ReadByte(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="249"> class="hljs-ln-code"> class="hljs-ln-line"> if (i_byte == -1) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="250"> class="hljs-ln-code"> class="hljs-ln-line"> break; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="251"> class="hljs-ln-code"> class="hljs-ln-line"> response_bytes_array.Add(Convert.ToByte(i_byte)); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="252"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="253"> class="hljs-ln-code"> class="hljs-ln-line"> byte[] response_bytes = new byte[response_bytes_array.Count]; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="254"> class="hljs-ln-code"> class="hljs-ln-line"> for(int i=0;i class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="255"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="256"> class="hljs-ln-code"> class="hljs-ln-line"> response_bytes[i] = Convert.ToByte( response_bytes_array[i]); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="257"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="258"> class="hljs-ln-code"> class="hljs-ln-line"> string tmphtml = Encoding.Default.GetString(response_bytes); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="259"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="260"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="261"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="262"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="263"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="264"> class="hljs-ln-code"> class="hljs-ln-line"> string encoding_name = ""; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="265"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="266"> class="hljs-ln-code"> class="hljs-ln-line"> if (global_var.RegCharset.IsMatch(tmphtml)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="267"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="268"> class="hljs-ln-code"> class="hljs-ln-line"> encoding_name = global_var.RegCharset.Match(tmphtml).Groups["charset"].Value; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="269"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="270"> class="hljs-ln-code"> class="hljs-ln-line"> else if (myResponse.CharacterSet != string.Empty) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="271"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="272"> class="hljs-ln-code"> class="hljs-ln-line"> encoding_name = myResponse.CharacterSet; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="273"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="274"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="275"> class="hljs-ln-code"> class="hljs-ln-line"> Encoding encoding = null; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="276"> class="hljs-ln-code"> class="hljs-ln-line"> if (encoding_name != null) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="277"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="278"> class="hljs-ln-code"> class="hljs-ln-line"> try class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="279"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="280"> class="hljs-ln-code"> class="hljs-ln-line"> encoding = Encoding.GetEncoding(encoding_name); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="281"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="282"> class="hljs-ln-code"> class="hljs-ln-line"> catch class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="283"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="284"> class="hljs-ln-code"> class="hljs-ln-line"> encoding = Encoding.Default; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="285"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="286"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="287"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="288"> class="hljs-ln-code"> class="hljs-ln-line"> encoding = Encoding.Default; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="289"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="290"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="291"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="292"> class="hljs-ln-code"> class="hljs-ln-line"> _content = encoding.GetString(response_bytes); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="293"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="294"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="295"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="296"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="297"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="298"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="299"> class="hljs-ln-code"> class="hljs-ln-line"> myStreamReader.Close(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="300"> class="hljs-ln-code"> class="hljs-ln-line"> myResponse.Close(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="301"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="302"> class="hljs-ln-code"> class="hljs-ln-line"> return true; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="303"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="304"> class="hljs-ln-code"> class="hljs-ln-line"> public void GetCode() class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="305"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="306"> class="hljs-ln-code"> class="hljs-ln-line"> try class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="307"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="308"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="309"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="310"> class="hljs-ln-code"> class="hljs-ln-line"> bool get_html = WebGetContent(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="311"> class="hljs-ln-code"> class="hljs-ln-line"> if (!get_html) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="312"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="313"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_error_url(_url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="314"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="315"> class="hljs-ln-code"> class="hljs-ln-line"> LogMsg.LogInfo("url: [" + _url + "] error!"); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="316"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="317"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="318"> class="hljs-ln-code"> class="hljs-ln-line"> return; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="319"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="320"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="321"> class="hljs-ln-code"> class="hljs-ln-line"> _content = _content.Replace("&", "&"); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="322"> class="hljs-ln-code"> class="hljs-ln-line"> if (global_var.RegBaseUrl.IsMatch(_content)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="323"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="324"> class="hljs-ln-code"> class="hljs-ln-line"> _tag_base_url = global_var.RegBaseUrl.Match(_content).Value; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="325"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="326"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="327"> class="hljs-ln-code"> class="hljs-ln-line"> GenBaseUrl(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="328"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="329"> class="hljs-ln-code"> class="hljs-ln-line"> _title = global_var.RegTitle.Match(_content).Value; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="330"> class="hljs-ln-code"> class="hljs-ln-line"> LogMsg.LogDebug("url: [" + _url + "] title: [" + _title + "]!"); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="331"> class="hljs-ln-code"> class="hljs-ln-line"> if (_title == "") class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="332"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="333"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="334"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="335"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="336"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="337"> class="hljs-ln-code"> class="hljs-ln-line"> bool save = RoleSet.Instance.OSave(_url, _content, _title); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="338"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="339"> class="hljs-ln-code"> class="hljs-ln-line"> MatchCollection myMC = global_var.RegAnchor.Matches(_content); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="340"> class="hljs-ln-code"> class="hljs-ln-line"> int num = 0; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="341"> class="hljs-ln-code"> class="hljs-ln-line"> foreach (Match i in myMC) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="342"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="343"> class="hljs-ln-code"> class="hljs-ln-line"> if (i.Value == "") class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="344"> class="hljs-ln-code"> class="hljs-ln-line"> continue; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="345"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="346"> class="hljs-ln-code"> class="hljs-ln-line"> string newUrl = i.Value; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="347"> class="hljs-ln-code"> class="hljs-ln-line"> int url_type = CheckUrl(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="348"> class="hljs-ln-code"> class="hljs-ln-line"> if (url_type == UrlType.UrlTypeError || class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="349"> class="hljs-ln-code"> class="hljs-ln-line"> url_type == UrlType.UrlTypeOtherFile || class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="350"> class="hljs-ln-code"> class="hljs-ln-line"> url_type == UrlType.UrlTypeSelf) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="351"> class="hljs-ln-code"> class="hljs-ln-line"> continue; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="352"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="353"> class="hljs-ln-code"> class="hljs-ln-line"> newUrl = GenUrl(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="354"> class="hljs-ln-code"> class="hljs-ln-line"> if (url_type == UrlType.UrlTypeHtml) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="355"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="356"> class="hljs-ln-code"> class="hljs-ln-line"> bool pass = CheckUrlThisSite(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="357"> class="hljs-ln-code"> class="hljs-ln-line"> if (pass && !UrlSet.instance.IsNoParse(newUrl)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="358"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="359"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="360"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="361"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="362"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="363"> class="hljs-ln-code"> class="hljs-ln-line"> num++; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="364"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_going_parse_url(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="365"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="366"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="367"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="368"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="369"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="370"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="371"> class="hljs-ln-code"> class="hljs-ln-line"> Role role = RoleSet.Instance.OIsMatchRole(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="372"> class="hljs-ln-code"> class="hljs-ln-line"> if (role != null) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="373"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="374"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="375"> class="hljs-ln-code"> class="hljs-ln-line"> DownFileTask downfile_task = new DownFileTask(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="376"> class="hljs-ln-code"> class="hljs-ln-line"> downfile_task.SetUrl(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="377"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="378"> class="hljs-ln-code"> class="hljs-ln-line"> string save_as_path = role.GetSaveAsPath(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="379"> class="hljs-ln-code"> class="hljs-ln-line"> string filename = newUrl.GetHashCode().ToString("X"); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="380"> class="hljs-ln-code"> class="hljs-ln-line"> filename += "_"+Path.GetFileName(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="381"> class="hljs-ln-code"> class="hljs-ln-line"> save_as_path += filename; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="382"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="383"> class="hljs-ln-code"> class="hljs-ln-line"> downfile_task.SetFilePath(save_as_path); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="384"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="385"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="386"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_going_parse_url(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="387"> class="hljs-ln-code"> class="hljs-ln-line"> xu_common.thread.ThreadPoll.Execute(downfile_task); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="388"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="389"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="390"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="391"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="392"> class="hljs-ln-code"> class="hljs-ln-line"> MatchCollection myMC2 = global_var.RegAnchor2.Matches(_content); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="393"> class="hljs-ln-code"> class="hljs-ln-line"> foreach (Match i in myMC2) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="394"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="395"> class="hljs-ln-code"> class="hljs-ln-line"> if (i.Value == "") class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="396"> class="hljs-ln-code"> class="hljs-ln-line"> continue; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="397"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="398"> class="hljs-ln-code"> class="hljs-ln-line"> string newUrl = "http://" + i.Value; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="399"> class="hljs-ln-code"> class="hljs-ln-line"> int url_type = CheckUrl(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="400"> class="hljs-ln-code"> class="hljs-ln-line"> if (url_type == UrlType.UrlTypeError || class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="401"> class="hljs-ln-code"> class="hljs-ln-line"> url_type == UrlType.UrlTypeOtherFile || class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="402"> class="hljs-ln-code"> class="hljs-ln-line"> url_type == UrlType.UrlTypeSelf) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="403"> class="hljs-ln-code"> class="hljs-ln-line"> continue; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="404"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="405"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="406"> class="hljs-ln-code"> class="hljs-ln-line"> if (url_type == UrlType.UrlTypeHtml) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="407"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="408"> class="hljs-ln-code"> class="hljs-ln-line"> bool pass = CheckUrlThisSite(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="409"> class="hljs-ln-code"> class="hljs-ln-line"> if (pass && !UrlSet.instance.IsNoParse(newUrl)) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="410"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="411"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="412"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="413"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="414"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="415"> class="hljs-ln-code"> class="hljs-ln-line"> num++; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="416"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_going_parse_url(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="417"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="418"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="419"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="420"> class="hljs-ln-code"> class="hljs-ln-line"> else class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="421"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="422"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="423"> class="hljs-ln-code"> class="hljs-ln-line"> Role role = RoleSet.Instance.OIsMatchRole(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="424"> class="hljs-ln-code"> class="hljs-ln-line"> if (role != null) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="425"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="426"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="427"> class="hljs-ln-code"> class="hljs-ln-line"> DownFileTask downfile_task = new DownFileTask(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="428"> class="hljs-ln-code"> class="hljs-ln-line"> downfile_task.SetUrl(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="429"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="430"> class="hljs-ln-code"> class="hljs-ln-line"> string save_as_path = role.GetSaveAsPath(); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="431"> class="hljs-ln-code"> class="hljs-ln-line"> string filename = newUrl.GetHashCode().ToString("X"); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="432"> class="hljs-ln-code"> class="hljs-ln-line"> filename += "_" + Path.GetFileName(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="433"> class="hljs-ln-code"> class="hljs-ln-line"> save_as_path += filename; class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="434"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="435"> class="hljs-ln-code"> class="hljs-ln-line"> downfile_task.SetFilePath(save_as_path); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="436"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="437"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="438"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_going_parse_url(newUrl); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="439"> class="hljs-ln-code"> class="hljs-ln-line"> xu_common.thread.ThreadPoll.Execute(downfile_task); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="440"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="441"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="442"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="443"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="444"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_parsed_url(_url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="445"> class="hljs-ln-code"> class="hljs-ln-line"> LogMsg.LogDebug(_url + ", have:" + num.ToString() + " urls, now going parse url num:" + UrlSet.instance.going_to_parse_url_num() + (save ? "save" : "nosave")); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="446"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="447"> class="hljs-ln-code"> class="hljs-ln-line"> catch(Exception ex) class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="448"> class="hljs-ln-code"> class="hljs-ln-line"> { class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="449"> class="hljs-ln-code"> class="hljs-ln-line"> LogMsg.LogError("error:" + ex.ToString()); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="450"> class="hljs-ln-code"> class="hljs-ln-line"> UrlSet.instance.add_error_url(_url); class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="451"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="452"> class="hljs-ln-code"> class="hljs-ln-line"> class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="453"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="454"> class="hljs-ln-code"> class="hljs-ln-line"> } class="hljs-ln-numbers"> class="hljs-ln-line hljs-ln-n" data-line-number="455"> class="hljs-ln-code"> class="hljs-ln-line">} class="hide-preCode-box">
class="hljs-button signin active" data-title="登录复制" data-report-click="{"spm":"1001.2101.3001.4334"}" onclick="hljs.signin(event)">
三、管理和调度任务
这里管理和调度任务不再赘述,如上面下载文件和图片一样,可以使用多线程进行下载。这些管理代码不难,且上面已有示例,下载网页的任务调度类似。保存在Url队列中,按顺序启动多线程去获取内容。
四、保存数据
这块代码比较隐私。博主在上面的代码处也有说明。是根据对应的网站,在获取网页源码内容后,需要自己写规则解析,解析到对应的内容后,直接调用DB操作类写入到数据库。
以上就是本次分享的内容。如果有兴趣的,欢迎关注博主私聊。
博主其它经典原创:《管理心得--工作目标应该是解决业务问题,而非感动自己》,《管理心得--如何高效进行跨部门合作》,《管理心得--员工最容易犯的错误:以错误去掩盖错误》,《技术心得--如何成为优秀的架构师》、《管理心得--如何成为优秀的架构师》、《管理心理--程序员如何选择职业赛道》。欢迎大家阅读。
>>
评论记录:
回复评论: