|
@@ -1,20 +1,22 @@
|
|
|
const crawler = require('./crawler')
|
|
|
const { Province, City, Area, Street, Village } = require('./sqlite')
|
|
|
|
|
|
+const limit = 100
|
|
|
+
|
|
|
/**
|
|
|
* 抓取所有省级数据
|
|
|
* @author https://github.com/modood
|
|
|
* @datetime 2018-01-31 22:11
|
|
|
*/
|
|
|
exports.fetchProvinces = async () => {
|
|
|
+ console.log('[1/1]正在抓取省级数据...')
|
|
|
const o = await crawler.fetchProvinces()
|
|
|
+ const rows = []
|
|
|
for (const code in o) {
|
|
|
const name = o[code]
|
|
|
- await Province.findOrCreate({
|
|
|
- where: { code },
|
|
|
- defaults: { code, name }
|
|
|
- })
|
|
|
+ rows.push({ code, name })
|
|
|
}
|
|
|
+ await Province.bulkCreate(rows, { ignoreDuplicates: true })
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -25,22 +27,26 @@ exports.fetchProvinces = async () => {
|
|
|
exports.fetchCities = async () => {
|
|
|
await exports.fetchProvinces()
|
|
|
|
|
|
+ const count = await Province.count()
|
|
|
+ let index = 0
|
|
|
let hasNext = true
|
|
|
let after
|
|
|
- const limit = 11
|
|
|
while (hasNext) {
|
|
|
const r = await Province.paginate({ limit, after })
|
|
|
+ const rows = []
|
|
|
for (let i = 0; i < r.results.length; i++) {
|
|
|
- const { dataValues: { code: provinceCode } } = r.results[i]
|
|
|
+ const { dataValues: {
|
|
|
+ name: provinceName,
|
|
|
+ code: provinceCode } } = r.results[i]
|
|
|
+ index++
|
|
|
+ console.log(`[${index}/${count}]正在抓取地级数据,当前省级:${provinceCode} ${provinceName}`)
|
|
|
const o = await crawler.fetchCities(provinceCode)
|
|
|
for (const code in o) {
|
|
|
const name = o[code]
|
|
|
- await City.findOrCreate({
|
|
|
- where: { code },
|
|
|
- defaults: { code, name, provinceCode }
|
|
|
- })
|
|
|
+ rows.push({ code, name, provinceCode })
|
|
|
}
|
|
|
}
|
|
|
+ await City.bulkCreate(rows, { ignoreDuplicates: true })
|
|
|
|
|
|
hasNext = r.cursors.hasNext
|
|
|
after = r.cursors.after
|
|
@@ -55,24 +61,29 @@ exports.fetchCities = async () => {
|
|
|
exports.fetchAreas = async () => {
|
|
|
await exports.fetchCities()
|
|
|
|
|
|
+ const count = await City.count()
|
|
|
+ let index = 0
|
|
|
let hasNext = true
|
|
|
let after
|
|
|
- const limit = 10
|
|
|
while (hasNext) {
|
|
|
const r = await City.paginate({ limit, after })
|
|
|
+ const rows = []
|
|
|
for (let i = 0; i < r.results.length; i++) {
|
|
|
- const { dataValues: { code: cityCode, provinceCode } } = r.results[i]
|
|
|
+ const { dataValues: {
|
|
|
+ name: cityName,
|
|
|
+ code: cityCode,
|
|
|
+ provinceCode } } = r.results[i]
|
|
|
+ index++
|
|
|
+ console.log(`[${index}/${count}]正在抓取县级数据,当前地级:${cityCode} ${cityName}`)
|
|
|
// 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级
|
|
|
if (['4420', '4419', '4604'].includes(cityCode)) continue
|
|
|
const o = await crawler.fetchAreas(cityCode)
|
|
|
for (const code in o) {
|
|
|
const name = o[code]
|
|
|
- await Area.findOrCreate({
|
|
|
- where: { code },
|
|
|
- defaults: { code, name, cityCode, provinceCode }
|
|
|
- })
|
|
|
+ rows.push({ code, name, cityCode, provinceCode })
|
|
|
}
|
|
|
}
|
|
|
+ await Area.bulkCreate(rows, { ignoreDuplicates: true })
|
|
|
|
|
|
hasNext = r.cursors.hasNext
|
|
|
after = r.cursors.after
|
|
@@ -87,17 +98,21 @@ exports.fetchAreas = async () => {
|
|
|
exports.fetchStreets = async () => {
|
|
|
await exports.fetchAreas()
|
|
|
|
|
|
+ const count = await Area.count()
|
|
|
+ let index = 0
|
|
|
let hasNext = true
|
|
|
let after
|
|
|
- const limit = 10
|
|
|
while (hasNext) {
|
|
|
const r = await Area.paginate({ limit, after })
|
|
|
+ const rows = []
|
|
|
for (let i = 0; i < r.results.length; i++) {
|
|
|
const { dataValues: {
|
|
|
name: areaName,
|
|
|
code: areaCode,
|
|
|
cityCode,
|
|
|
provinceCode } } = r.results[i]
|
|
|
+ index++
|
|
|
+ console.log(`[${index}/${count}]正在抓取乡级数据,当前县级:${areaCode} ${areaName}`)
|
|
|
// 特殊处理:名为市辖区的县级没有乡级
|
|
|
// 1. 福建省泉州市金门县(350527)也没有乡级
|
|
|
// 2. 甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),
|
|
@@ -106,12 +121,10 @@ exports.fetchStreets = async () => {
|
|
|
const o = await crawler.fetchStreets(areaCode)
|
|
|
for (const code in o) {
|
|
|
const name = o[code]
|
|
|
- await Street.findOrCreate({
|
|
|
- where: { code },
|
|
|
- defaults: { code, name, areaCode, cityCode, provinceCode }
|
|
|
- })
|
|
|
+ rows.push({ code, name, areaCode, cityCode, provinceCode })
|
|
|
}
|
|
|
}
|
|
|
+ await Street.bulkCreate(rows, { ignoreDuplicates: true })
|
|
|
|
|
|
hasNext = r.cursors.hasNext
|
|
|
after = r.cursors.after
|
|
@@ -126,26 +139,29 @@ exports.fetchStreets = async () => {
|
|
|
exports.fetchVillages = async () => {
|
|
|
await exports.fetchStreets()
|
|
|
|
|
|
+ const count = await Street.count()
|
|
|
+ let index = 0
|
|
|
let hasNext = true
|
|
|
let after
|
|
|
- const limit = 10
|
|
|
while (hasNext) {
|
|
|
const r = await Street.paginate({ limit, after })
|
|
|
+ const rows = []
|
|
|
for (let i = 0; i < r.results.length; i++) {
|
|
|
const { dataValues: {
|
|
|
+ name: streetName,
|
|
|
code: streetCode,
|
|
|
areaCode,
|
|
|
cityCode,
|
|
|
provinceCode } } = r.results[i]
|
|
|
+ index++
|
|
|
+ console.log(`[${index}/${count}]正在抓取村级数据,当前乡级:${streetCode} ${streetName}`)
|
|
|
const o = await crawler.fetchVillages(streetCode)
|
|
|
for (const code in o) {
|
|
|
const name = o[code]
|
|
|
- await Village.findOrCreate({
|
|
|
- where: { code },
|
|
|
- defaults: { code, name, streetCode, areaCode, cityCode, provinceCode }
|
|
|
- })
|
|
|
+ rows.push({ code, name, streetCode, areaCode, cityCode, provinceCode })
|
|
|
}
|
|
|
}
|
|
|
+ await Village.bulkCreate(rows, { ignoreDuplicates: true })
|
|
|
|
|
|
hasNext = r.cursors.hasNext
|
|
|
after = r.cursors.after
|