{"id":166541,"date":"2013-01-22T00:53:03","date_gmt":"2013-01-21T20:53:03","guid":{"rendered":"http:\/\/savepearlharbor.com\/?p=166541"},"modified":"-0001-11-30T00:00:00","modified_gmt":"-0001-11-29T21:00:00","slug":"","status":"publish","type":"post","link":"https:\/\/savepearlharbor.com\/?p=166541","title":{"rendered":"<span class=\"post_title\">\u041f\u0430\u0440\u0441\u0438\u043c \u0441\u043f\u0438\u0441\u043e\u043a \u0432\u0430\u043a\u0430\u043d\u0441\u043a\u0438\u0439 \u0441\u0440\u0435\u0434\u0441\u0442\u0432\u0430\u043c\u0438 Python<\/span>"},"content":{"rendered":"<div class=\"content html_format\">   \t\u041d\u0430 \u0425\u0430\u0431\u0440\u0435 \u0443\u0436\u0435 \u0431\u044b\u043b\u0430 \u0437\u0430\u043c\u0435\u0442\u043a\u0430, \u043a\u0430\u043a <a href=\"http:\/\/habrahabr.ru\/post\/112325\/\">\u0432\u044b\u0442\u0430\u0449\u0438\u0442\u044c \u0438 \u0440\u0430\u0441\u043f\u0430\u0440\u0441\u0438\u0442\u044c \u0441\u043f\u0438\u0441\u043e\u043a \u0432\u0430\u043a\u0430\u043d\u0441\u0438\u0439 \u0441 hh.ru<\/a>.<\/p>\n<p>  \u041e\u0434\u043d\u0430\u043a\u043e \u0442\u0430\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043b\u0441\u044f C# \u2014 \u044f\u0437\u044b\u043a \u0441\u043b\u043e\u0436\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439 \u0441 \u0445\u0438\u0442\u0440\u044b\u043c\u0438 \u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441\u0430\u043c\u0438. \u0427\u0442\u043e \u0436\u0435 \u043a\u0430\u0441\u0430\u0435\u0442\u0441\u044f \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u0431\u043e\u043b\u044c\u0448\u0438\u0445 \u00ab\u043f\u0440\u043e\u0441\u0442\u044b\u043d\u0435\u0439\u00bb \u043e\u0434\u043d\u043e\u0442\u0438\u043f\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445, \u0442\u043e \u0434\u043b\u044f \u043c\u0435\u043b\u043a\u0438\u0445 \u0437\u0430\u0434\u0430\u0447 \u0432 \u044d\u0442\u043e\u0439 \u043e\u0431\u043b\u0430\u0441\u0442\u0438 \u0438\u0441\u043f\u043e\u043a\u043e\u043d \u0432\u0435\u043a\u0443 \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u044e\u0442 \u0441\u043a\u0440\u0438\u043f\u0442\u043e\u0432\u044b\u0435 \u044f\u0437\u044b\u043a\u0438.<\/p>\n<p>  \u0412\u043e\u0442 \u0438 \u043d\u0430\u043f\u0438\u0441\u0430\u043b\u0441\u044f \u043d\u0435\u0431\u043e\u043b\u044c\u0448\u043e\u0439 \u0441\u043a\u0440\u0438\u043f\u0442 \u043d\u0430 python, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0443\u0442\u0430\u0441\u043a\u0438\u0432\u0430\u0435\u0442 \u0441 hh.ru \u0441\u043f\u0438\u0441\u043e\u043a \u0432\u0430\u043a\u0430\u043d\u0441\u0438\u0439 \u043f\u043e \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u043e\u043c\u0443 \u0437\u0430\u043f\u0440\u043e\u0441\u0443 (\u043f\u0438\u0448\u0435\u0442\u0441\u044f \u0432 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0443\u044e searchParam) \u0438 \u0432\u044b\u0432\u043e\u0434\u0438\u0442 \u0438\u0445 \u0432 \u0444\u043e\u0440\u043c\u0430\u0442\u0435 <a href=\"http:\/\/www.rfc-editor.org\/rfc\/rfc4180.txt\">CSV<\/a>. \u041c\u043e\u0436\u043d\u043e \u0432\u044b\u0432\u043e\u0434\u0438\u0442\u044c \u043d\u0430 \u044d\u043a\u0440\u0430\u043d \u0438 \u0443\u0434\u0438\u0432\u043b\u044f\u0442\u044c\u0441\u044f, \u043c\u043e\u0436\u043d\u043e \u043f\u0435\u0440\u0435\u043d\u0430\u043f\u0440\u0430\u0432\u0438\u0442\u044c \u0432 output.csv \u0438 \u043e\u0442\u043a\u0440\u044b\u0442\u044c \u0432 \u043a\u0430\u043a\u043e\u043c-\u043d\u0438\u0431\u0443\u0434\u044c \u0442\u0430\u0431\u043b\u0438\u0447\u043d\u043e\u043c \u0440\u0435\u0434\u0430\u043a\u0442\u043e\u0440\u0435.<br \/>  <a name=\"habracut\"><\/a><br \/>  \u0412\u0441\u0451 \u043d\u0430\u043f\u0438\u0441\u0430\u043d\u043e \u043e\u043a\u043e\u043b\u043e \u043f\u043e\u043b\u0443\u0433\u043e\u0434\u0430 \u043d\u0430\u0437\u0430\u0434 \u043d\u0430 Python 3.x \u0438 \u0442\u0435\u0441\u0442\u0438\u0440\u043e\u0432\u0430\u043b\u043e\u0441\u044c \u0438\u0437-\u043f\u043e\u0434 Windows. \u0414\u043e \u0441\u0438\u0445 \u043f\u043e\u0440 \u0441 \u0443\u0434\u043e\u0432\u043e\u043b\u044c\u0441\u0442\u0432\u0438\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044e \u043a\u0430\u043a \u0431\u043e\u043b\u0432\u0430\u043d\u043a\u0443 \u0434\u043b\u044f \u0441\u043a\u0440\u0438\u043f\u0442\u0430, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0434\u043e\u043b\u0436\u0435\u043d \u043e\u0442\u043a\u0443\u0434\u0430-\u0442\u043e \u0447\u0442\u043e-\u0442\u043e \u0441\u043a\u0430\u0447\u0430\u0442\u044c.<\/p>\n<p>  \u0421\u0442\u0440\u0430\u043d\u043d\u044b\u0439 \u0441\u043f\u043e\u0441\u043e\u0431 \u0432\u044b\u0445\u043e\u0434\u0430 \u0438\u0437 \u0446\u0438\u043a\u043b\u0430 \u0441\u0432\u044f\u0437\u0430\u043d \u0441 \u043c\u043e\u0436\u0435\u0442 \u0431\u044b\u0442\u044c \u0431\u0430\u0433\u043e\u0439, \u0430 \u043c\u043e\u0436\u0435\u0442 \u0431\u044b\u0442\u044c \u0444\u0438\u0447\u0435\u0439 hh.ru: \u0435\u0441\u043b\u0438 \u0442\u044b \u043f\u043e\u043f\u044b\u0442\u0430\u0435\u0448\u044c\u0441\u044f \u043e\u0442\u043a\u0440\u044b\u0442\u044c \u043d\u0435\u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0449\u0443\u044e \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0443 \u0441\u043f\u0438\u0441\u043a\u0430 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u0432, \u0440\u0430\u0437\u0431\u0438\u0442\u043e\u0433\u043e paging-\u043e\u043c, \u0442\u043e \u0432\u043c\u0435\u0441\u0442\u043e 404 \u043e\u043d \u043f\u043e\u043a\u0430\u0436\u0435\u0442 \u0441\u0442\u0430\u043d\u0434\u0430\u0440\u0442\u043d\u0443\u044e \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0443, \u043d\u043e \u0443\u0436\u0435 \u0431\u0435\u0437 \u0432\u0430\u043a\u0430\u043d\u0441\u0438\u0439.<\/p>\n<pre><code class=\"python\">import urllib.parse import re def LoadPageText(url):   import urllib.request   from urllib.error import HTTPError,URLError   opener = urllib.request.build_opener()   try: pageSource = opener.open(url)   except HTTPError as e: return None   except URLError as e: return None   return pageSource.read().decode('utf-8') searchUrl = &quot;http:\/\/hh.ru\/applicant\/searchvacancyresult.xml?text={0}&amp;page={1}&amp;professionalAreaId=0&amp;desireableCompensation=&amp;compensationCurrencyCode=RUR&quot; searchParam = &quot;C++&quot; regExpElemSource = r'&lt;div class=&quot;searchresult__name&quot;&gt;&lt;span class=&quot;b-marker&quot;&gt;&lt;a.*?href=&quot;(?P&lt;vacancy_url&gt;http:\/\/(hh|career)\\.ru\/vacancy\/(?P&lt;vacancy_id&gt;\\d+)(\\?query=.*?)?|http\\:\/\/[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(\/\\S*)?)&quot;.*?&gt;(?P&lt;title&gt;[ \\(\\)\u0430-\u044f\\w\/,.#+-]*)&lt;\/a&gt;&lt;\/span&gt;(&lt;.*?&gt;)*?&lt;\/div&gt;(&lt;div class=&quot;b-vacancy-list-nosalary&quot;&gt;[\\s\/\u0430-\u044f.]*&lt;\/div&gt;|&lt;div class=&quot;b-vacancy-list-salary&quot;&gt;\\s*(\u043e\u0442\\s*(?P&lt;from&gt;[\\d]*(\\s*[\\d]+){0,2}))?\\s*(\u0434\u043e\\s*(?P&lt;to&gt;[\\d]*(\\s*[\\d]+){0,2})\\s*)?(?P&lt;currency&gt;[\\w\u0430-\u044f.]+)\\s*&lt;\/div&gt;)&lt;div class=&quot;searchresult__placetime&quot;&gt;&lt;a href=&quot;(?P&lt;company_url&gt;\/employer\/(?P&lt;company_id&gt;\\d*))&quot;&gt;(?P&lt;company&gt;[ \\(\\)\u0430-\u044f\\w\/,.#+-]+)&lt;\/a&gt;\\s*&lt;span class=&quot;searchresult__address&quot;&gt;\\s*\\((?P&lt;city&gt;[ \u0430-\u044f-]*)(,&lt;br\/&gt;&lt;span style=&quot;color:#[a-f\\d]*&quot;&gt;\\s*\u043c.\\s*(?P&lt;underground_station&gt;[ \u0430-\u044f-]*)&lt;\/span&gt;)?\\),\\s*&lt;\/span&gt;\\s*&lt;span class=&quot;b-vacancy-list-date&quot;&gt;\\s*(?P&lt;date_day&gt;\\d+)\\s+(?P&lt;date_month&gt;[\u0430-\u044f]+)' regExpElem = re.compile(regExpElemSource, re.IGNORECASE) isFirst = True i = 0 while True :   i += 1   searchUrlPrep = searchUrl.format(urllib.parse.quote_plus(searchParam), i)   pageText = LoadPageText(searchUrlPrep)   if pageText != None:     hasElements = False     for elemVacancy in re.finditer(regExpElem, pageText):       hasElements = True       if isFirst:         [print('&quot;%s&quot;' % x, end=&quot;;&quot;) for x in elemVacancy.groupdict().keys()]         isFirst = False       print()       [print((elemVacancy.group(key) if key == &quot;from&quot; and key == &quot;to&quot; else str(elemVacancy.group(key)).replace('\\u00A0', ' ')) if elemVacancy.group(key) != None else &quot;0&quot;, end=&quot;;&quot;) for iterate, key in enumerate(elemVacancy.groupdict())]     if not hasElements: break <\/code><\/pre>\n<p>  \u041b\u044e\u0431\u043e\u043f\u044b\u0442\u0441\u0442\u0432\u0443\u044e\u0449\u0438\u043c \u0431\u0443\u0434\u0435\u0442 \u0438\u043d\u0442\u0435\u0440\u0435\u0441\u043d\u043e <a href=\"http:\/\/code.google.com\/p\/job-list-grabber\/\">\u043f\u043e\u0441\u0435\u0442\u0438\u0442\u044c \u043e\u0444\u0438\u0446\u0438\u0430\u043b\u044c\u043d\u0443\u044e \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0443 \u0441\u043a\u0440\u0438\u043f\u0442\u0430<\/a>, \u0430 \u043d\u0435\u0443\u0433\u043e\u043c\u043e\u043d\u043d\u044b\u043c \u2014 <a href=\"https:\/\/github.com\/rikkimongoose\/JobListGrabber\">\u0441\u0434\u0435\u043b\u0430\u0442\u044c \u0444\u043e\u0440\u043a \u043d\u0430 GitHub<\/a> \u0438 \u043f\u0435\u0440\u0435\u0434\u0435\u043b\u0430\u0442\u044c \u0435\u0433\u043e \u0432\u043e \u0447\u0442\u043e-\u043d\u0438\u0431\u0443\u0434\u044c \u0441\u043e\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u043e \u043d\u0435\u0432\u043e\u043e\u0431\u0440\u0430\u0437\u0438\u043c\u043e\u0435.      \t \t\t   \t<\/p>\n<div class=\"clear\"><\/div>\n<\/p><\/div>\n<p> \u0441\u0441\u044b\u043b\u043a\u0430 \u043d\u0430 \u043e\u0440\u0438\u0433\u0438\u043d\u0430\u043b \u0441\u0442\u0430\u0442\u044c\u0438 <a href=\"http:\/\/habrahabr.ru\/post\/166541\/\"> http:\/\/habrahabr.ru\/post\/166541\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<div class=\"content html_format\">   \t\u041d\u0430 \u0425\u0430\u0431\u0440\u0435 \u0443\u0436\u0435 \u0431\u044b\u043b\u0430 \u0437\u0430\u043c\u0435\u0442\u043a\u0430, \u043a\u0430\u043a <a href=\"http:\/\/habrahabr.ru\/post\/112325\/\">\u0432\u044b\u0442\u0430\u0449\u0438\u0442\u044c \u0438 \u0440\u0430\u0441\u043f\u0430\u0440\u0441\u0438\u0442\u044c \u0441\u043f\u0438\u0441\u043e\u043a \u0432\u0430\u043a\u0430\u043d\u0441\u0438\u0439 \u0441 hh.ru<\/a>.<\/p>\n<p>  \u041e\u0434\u043d\u0430\u043a\u043e \u0442\u0430\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043b\u0441\u044f C# \u2014 \u044f\u0437\u044b\u043a \u0441\u043b\u043e\u0436\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439 \u0441 \u0445\u0438\u0442\u0440\u044b\u043c\u0438 \u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441\u0430\u043c\u0438. \u0427\u0442\u043e \u0436\u0435 \u043a\u0430\u0441\u0430\u0435\u0442\u0441\u044f \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u0431\u043e\u043b\u044c\u0448\u0438\u0445 \u00ab\u043f\u0440\u043e\u0441\u0442\u044b\u043d\u0435\u0439\u00bb \u043e\u0434\u043d\u043e\u0442\u0438\u043f\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445, \u0442\u043e \u0434\u043b\u044f \u043c\u0435\u043b\u043a\u0438\u0445 \u0437\u0430\u0434\u0430\u0447 \u0432 \u044d\u0442\u043e\u0439 \u043e\u0431\u043b\u0430\u0441\u0442\u0438 \u0438\u0441\u043f\u043e\u043a\u043e\u043d \u0432\u0435\u043a\u0443 \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u044e\u0442 \u0441\u043a\u0440\u0438\u043f\u0442\u043e\u0432\u044b\u0435 \u044f\u0437\u044b\u043a\u0438.<\/p>\n<p>  \u0412\u043e\u0442 \u0438 \u043d\u0430\u043f\u0438\u0441\u0430\u043b\u0441\u044f \u043d\u0435\u0431\u043e\u043b\u044c\u0448\u043e\u0439 \u0441\u043a\u0440\u0438\u043f\u0442 \u043d\u0430 python, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0443\u0442\u0430\u0441\u043a\u0438\u0432\u0430\u0435\u0442 \u0441 hh.ru \u0441\u043f\u0438\u0441\u043e\u043a \u0432\u0430\u043a\u0430\u043d\u0441\u0438\u0439 \u043f\u043e \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u043e\u043c\u0443 \u0437\u0430\u043f\u0440\u043e\u0441\u0443 (\u043f\u0438\u0448\u0435\u0442\u0441\u044f \u0432 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0443\u044e searchParam) \u0438 \u0432\u044b\u0432\u043e\u0434\u0438\u0442 \u0438\u0445 \u0432 \u0444\u043e\u0440\u043c\u0430\u0442\u0435 <a href=\"http:\/\/www.rfc-editor.org\/rfc\/rfc4180.txt\">CSV<\/a>. \u041c\u043e\u0436\u043d\u043e \u0432\u044b\u0432\u043e\u0434\u0438\u0442\u044c \u043d\u0430 \u044d\u043a\u0440\u0430\u043d \u0438 \u0443\u0434\u0438\u0432\u043b\u044f\u0442\u044c\u0441\u044f, \u043c\u043e\u0436\u043d\u043e \u043f\u0435\u0440\u0435\u043d\u0430\u043f\u0440\u0430\u0432\u0438\u0442\u044c \u0432 output.csv \u0438 \u043e\u0442\u043a\u0440\u044b\u0442\u044c \u0432 \u043a\u0430\u043a\u043e\u043c-\u043d\u0438\u0431\u0443\u0434\u044c \u0442\u0430\u0431\u043b\u0438\u0447\u043d\u043e\u043c \u0440\u0435\u0434\u0430\u043a\u0442\u043e\u0440\u0435.  <\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[],"class_list":["post-166541","post","type-post","status-publish","format-standard","hentry"],"_links":{"self":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/166541","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=166541"}],"version-history":[{"count":0,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/166541\/revisions"}],"wp:attachment":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=166541"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=166541"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=166541"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}