웹 크롤링

[ 웹 크롤링 ] 지니 뮤직 순위 크롤링 하기

dauneee 2022. 5. 24. 15:38

1. 음악 정보를 담을 테이블을 생성한다.

 

create table genie(
	mid int primary key,
	title varchar(200),
	artist varchar(100)
);

select * from genie;

 

 

2. 데이터를 삽입한다.

 

import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

class MusicVO{
	private int mid;
	private String title;
	private String artist;

	public int getMid() {
		return mid;
	}
	public void setMid(int mid) {
		this.mid = mid;
	}
	public String getTitle() {
		return title;
	}
	public void setTitle(String title) {
		this.title = title;
	}
	public String getArtist() {
		return artist;
	}
	public void setArtist(String artist) {
		this.artist = artist;
	}
}

public class Test02 {
	public static void main(String[] args) {
		ArrayList<MusicVO> datas=new ArrayList<MusicVO>();

		final String url="https://www.genie.co.kr/";
		Document doc=null;
		
		try {
			doc=Jsoup.connect(url).get();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		Elements eles=doc.select("table.list-wrap > tbody > tr.list > td.info > a");
		
		// 크롤링한 데이터는 대체로 이터레이터로 확인
		Iterator<Element> itr=eles.iterator();
		int cnt=0;
		while(itr.hasNext()) {
			MusicVO vo=new MusicVO();
			itr.next();
			vo.setTitle(itr.next().text());
			vo.setArtist(itr.next().text());
			datas.add(vo);
		}
		
		// connection 확보
		final String driverName="oracle.jdbc.driver.OracleDriver";
		final String dburl="jdbc:oracle:thin:@localhost:1521:xe";
		final String user="system";
		final String pwd="oracle";
        
		Connection conn=null;
		PreparedStatement pstmt=null;
        
		String sql="insert into genie values((select nvl(max(mid),0)+1 from genie),?,?)";
		
		try {
			Class.forName(driverName);
			conn=DriverManager.getConnection(dburl,user,pwd);
			pstmt=conn.prepareStatement(sql);
			for(MusicVO v:datas) {
				pstmt.setString(1, v.getTitle());
				pstmt.setString(2, v.getArtist());
				pstmt.executeUpdate();
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				pstmt.close();
				conn.close();
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}
}