create DataBase scraping;
create Table job_offer (id BIGINT(7) NOT NULL AUTO_INCREMENT, comp VARCHAR(200), title VARCHAR(200), URL VARCHAR(1000), created TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(id));
# 공공기관 채용정보에서 정보통신 공고만 추려서 리스팅하는 소스
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import ssl
import datetime
import pymysql
context = ssl._create_unverified_context()
# url을 변수에 삽입하여 저장한다.
url = "https://job.alio.go.kr/recruit.do?pageNo=1¶m=&search_yn=Y" \
"&idx=&recruitYear=&recruitMonth=&detail_code=R600020&location=R3010&work_type=R1010" \
"&work_type=R1030&career=R2020&education=R7010&education=R7040&education=R7050" \
"&education=R7060&replacement=N&s_date=2019.03.12&e_date=2019.10.12&org_name=&title=&order=REG_DATE"
conn = pymysql.connect(host='127.0.0.1', user='root',passwd='test1234', db='mysql')
cur = conn.cursor()
cur.execute("USE scraping")
html = urlopen(url, context=context)
bsObj = BeautifulSoup(html.read(), "html.parser")
table = bsObj.find("table",class_="tbl type_03")
def extractNumber(word):
i = int(re.findall('\d+', word)[0])
return i
def store(comp, title, URL):
cur.execute(
"INSERT INTO job_offer (comp, title, URL) VALUES (\"%s\", \"%s\", \"%s\")", (comp, title, URL)
)
cur.connection.commit()
list = []
trs = table.tbody.findAll("tr")
for idx, tr in enumerate(trs):
title = tr.select("td")[2].get_text().strip() # 제목
comp = tr.select("td")[3].get_text().strip() # 기업명
a = extractNumber(tr.select("td")[2].find("a").attrs['onclick'])
new_url = "https://job.alio.go.kr/recruitview.do?pageNo=1¶m=&search_yn=Y&idx={0}" \
"&recruitYear=&recruitMonth=&detail_code=R600020&location=R3010&work_type=R1010" \
"&work_type=R1030&career=R2020&education=R7010&education=R7040" \
"&education=R7050&education=R7060&replacement=N&s_date=2019.03.12" \
"&e_date=2019.10.12&org_name=&title=&order=REG_DATE".format(a)
print(idx, title, comp, new_url)
store(comp, title, new_url)
cur.close()
conn.close()
실행결과, 다음과 같이 데이터가 저장되는 것을 확인할 수 있다.