Parsing HTML DOM
using JSDOM
Import Modules
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const https = require('https');
Get HTML Page
var data = "";
var req = https.get('https://www.imdb.com/chart/top/', function (res) {
res.setEncoding('utf8');
res.on('data', function (chunk) {
data += chunk;
});
});
Wait For The Data To Load
Main Parsing Function
var movie_list = [];
function parse_html(data) {
let rank = 0 ;
let document = new JSDOM(String(data)).window.document;
let movie_table = Array.from(document.getElementsByTagName('tr'));
movie_table.forEach((ele,err) => {
let movie_info = {};
Array.from(ele.cells).forEach((cell, err) => {
if(rank > 0){
if (cell.matches('.titleColumn')){
movie_info["rank"] = rank;
movie_info["name"] = cell.children[0].textContent;
movie_info["year"] = cell.children[1].textContent;
let people = cell.children[0].getAttribute("title").split(',');
movie_info["people"] = people;
}
if (cell.matches('.imdbRating')) {
movie_info["rating"] = cell.children[0].textContent;
}
}
});
rank = rank +1;
movie_list.push(movie_info);
});
movie_list.forEach((ele,err)=>{
console.log(ele);
});
};
Last updated