웹 크롤링
[ 웹 크롤링 ] 지니 뮤직 순위 크롤링 하기
dauneee
2022. 5. 24. 15:38
1. 음악 정보를 담을 테이블을 생성한다.
create table genie(
mid int primary key,
title varchar(200),
artist varchar(100)
);
select * from genie;
2. 데이터를 삽입한다.
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
class MusicVO{
private int mid;
private String title;
private String artist;
public int getMid() {
return mid;
}
public void setMid(int mid) {
this.mid = mid;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getArtist() {
return artist;
}
public void setArtist(String artist) {
this.artist = artist;
}
}
public class Test02 {
public static void main(String[] args) {
ArrayList<MusicVO> datas=new ArrayList<MusicVO>();
final String url="https://www.genie.co.kr/";
Document doc=null;
try {
doc=Jsoup.connect(url).get();
} catch (IOException e) {
e.printStackTrace();
}
Elements eles=doc.select("table.list-wrap > tbody > tr.list > td.info > a");
// 크롤링한 데이터는 대체로 이터레이터로 확인
Iterator<Element> itr=eles.iterator();
int cnt=0;
while(itr.hasNext()) {
MusicVO vo=new MusicVO();
itr.next();
vo.setTitle(itr.next().text());
vo.setArtist(itr.next().text());
datas.add(vo);
}
// connection 확보
final String driverName="oracle.jdbc.driver.OracleDriver";
final String dburl="jdbc:oracle:thin:@localhost:1521:xe";
final String user="system";
final String pwd="oracle";
Connection conn=null;
PreparedStatement pstmt=null;
String sql="insert into genie values((select nvl(max(mid),0)+1 from genie),?,?)";
try {
Class.forName(driverName);
conn=DriverManager.getConnection(dburl,user,pwd);
pstmt=conn.prepareStatement(sql);
for(MusicVO v:datas) {
pstmt.setString(1, v.getTitle());
pstmt.setString(2, v.getArtist());
pstmt.executeUpdate();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
pstmt.close();
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}