module.exports = {
options: {
hostname:
'www.ozon.ru',
port:
443,
path:
'/context/detail/id/144054492/',
method:
'GET',
headers: {
'accept':
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'if-modified-since':
'Fri, 08 Jun 2018 03:42:08 GMT',
'referer':
'https://www.ozon.ru/catalog/1133763/?type=48856',
'upgrade-insecure-requests':
1,
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
}
},
baseURL:
'https://www.ozon.ru',
timeout:
3000
}
var fs =
require(
'fs')
var datas =
require(
'../tmp/mid_output.json')
var culs =
new
Object()
var prepare =
str => {
if (str ===
undefined) {
return
null
}
else {
return str.replace(
/,/g,
',').replace(
/n/g,
';')
}
}
var num =
0;
for(
var i =
0;i<datas.length;i++){
if(datas[i].params ===
undefined){
datas.splice(i,
1);
}
}
var num1 =
0;
for(
var i =
0;i<datas.length;i++){
if(datas[i].params ==
undefined){
num1 ++;
}
}
datas.splice(datas.length-num1,num1);
for (
var data
of datas) {
for (
var param
of data.params) {
if (culs[param.key] ===
undefined) culs[param.key] =
true
data[param.key] = param.value
}
delete data.params
}
var columnsName =
'number,href,img,name,price,cnum'
for (
var key
in culs) {
columnsName +=
',' + prepare(key)
}
columnsName +=
'n'
fs.writeFileSync(__dirname +
'/../output/output.csv', columnsName, {
flag:
'a'}, err =>
console.log(err))
var cnt =
1
for (
var data
of datas) {
var str =
''
str += cnt++
str +=
',' + prepare(data.href)
str +=
',' + prepare(data.img)
str +=
',' + prepare(data.name)
str +=
',' + prepare(data.price)
str +=
',' + prepare(data.cnum)
for (
var key
in culs) {
str +=
',' + prepare(data[key])
}
str +=
'n'
fs.writeFileSync(__dirname +
'/../output/output.csv', str, {
flag:
'a'}, err =>
console.log(err))
}
console.log(cnt)
const https =
require(
'https')
const fs =
require(
'fs')
const iconv =
require(
'iconv-lite')
const jsdom =
require(
'jsdom')
const { JSDOM } = jsdom
var config =
require(
'../config')
var items =
new
Array()
var getInput =
() => {
var result = fs.readFileSync(__dirname +
'/../input/input.txt')
const dom =
new JSDOM(result.toString())
var lines = dom.window.document.getElementsByClassName(
'bOneTile inline')
var i =
1
for (
var line
of lines) {
var href = line.getElementsByClassName(
'eOneTile_link')[
0].href
var img = line.getElementsByClassName(
'eOneTile_image_link')[
0].getAttribute(
'data-image-src')
var name = line.getAttribute(
'data-name')
var price = (line.getAttribute(
'data-price') !==
undefined) ? line.getAttribute(
'data-price') :
'null'
var cnum = (line.getElementsByClassName(
'eOneTile_ReviewsCount')[
0] ===
undefined ?
'0' : line.getElementsByClassName(
'eOneTile_ReviewsCount')[
0].innerHTML)
items.push({
href: href,
img: img,
name: name,
price: price,
cnum: cnum
})
}
}
var getDetail =
idx => {
var item = items[idx]
config.options.path = item.href
const req = https.get(config.options, res => {
var datas = []
var size =
0
res.on(
'data', data => {
datas.push(data)
size += data.length
})
res.on(
'end', () => {
var buff = Buffer.concat(datas, size)
var result = iconv.decode(buff,
'win1251')
const dom =
new JSDOM(result.toString())
var lines = dom.window.document.getElementsByClassName(
'eItemProperties_line')
item.params =
new
Array()
for (
var line
of lines) {
var key = line.childNodes[
1].innerHTML
var value = line.childNodes[
3].innerHTML
item.params.push({
key: key,
value: value
})
}
})
})
req.end()
config.options.headers.referer = config.baseURL + item.href
}
var getAllDetail =
(idx, end) => () => {
if (idx < end) {
console.log(idx)
getDetail(idx)
setTimeout(getAllDetail(idx +
1, end), config.timeout)
}
else {
setOutput()
}
}
var setOutput =
() => {
fs.writeFile(__dirname +
'/../tmp/mid_output.json',
JSON.stringify(items), err =>
console.log(err))
}
getInput()
setOutput()
getAllDetail(
0, items.length)()