|
@@ -1,6 +1,7 @@
|
|
const crawler = require('./crawler')
|
|
const crawler = require('./crawler')
|
|
const { Province, City, Area, Street, Village } = require('./sqlite')
|
|
const { Province, City, Area, Street, Village } = require('./sqlite')
|
|
|
|
|
|
|
|
+// 每抓取 100 个页面再批量写入数据库
|
|
const limit = 100
|
|
const limit = 100
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -75,8 +76,10 @@ exports.fetchAreas = async () => {
|
|
provinceCode } } = r.results[i]
|
|
provinceCode } } = r.results[i]
|
|
index++
|
|
index++
|
|
console.log(`[${index}/${count}]正在抓取县级数据,当前地级:${cityCode} ${cityName}`)
|
|
console.log(`[${index}/${count}]正在抓取县级数据,当前地级:${cityCode} ${cityName}`)
|
|
- // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级
|
|
|
|
|
|
+
|
|
|
|
+ // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级。
|
|
if (['4420', '4419', '4604'].includes(cityCode)) continue
|
|
if (['4420', '4419', '4604'].includes(cityCode)) continue
|
|
|
|
+
|
|
const o = await crawler.fetchAreas(cityCode)
|
|
const o = await crawler.fetchAreas(cityCode)
|
|
for (const code in o) {
|
|
for (const code in o) {
|
|
const name = o[code]
|
|
const name = o[code]
|
|
@@ -88,6 +91,17 @@ exports.fetchAreas = async () => {
|
|
hasNext = r.cursors.hasNext
|
|
hasNext = r.cursors.hasNext
|
|
after = r.cursors.after
|
|
after = r.cursors.after
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级,
|
|
|
|
+ // 需要手动插入。
|
|
|
|
+ await Area.bulkCreate([
|
|
|
|
+ { code: '441900', name: '东莞市', cityCode: '4419', provinceCode: '44' },
|
|
|
|
+ { code: '442000', name: '中山市', cityCode: '4420', provinceCode: '44' },
|
|
|
|
+ { code: '460400', name: '儋州市', cityCode: '4604', provinceCode: '46' }
|
|
|
|
+ ], { ignoreDuplicates: true })
|
|
|
|
+
|
|
|
|
+ // 特殊处理:甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),重命名。
|
|
|
|
+ await Area.update({ name: '嘉峪关市' }, { where: { code: '620201' } })
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -113,12 +127,19 @@ exports.fetchStreets = async () => {
|
|
provinceCode } } = r.results[i]
|
|
provinceCode } } = r.results[i]
|
|
index++
|
|
index++
|
|
console.log(`[${index}/${count}]正在抓取乡级数据,当前县级:${areaCode} ${areaName}`)
|
|
console.log(`[${index}/${count}]正在抓取乡级数据,当前县级:${areaCode} ${areaName}`)
|
|
|
|
+
|
|
// 特殊处理:名为市辖区的县级没有乡级
|
|
// 特殊处理:名为市辖区的县级没有乡级
|
|
// 1. 福建省泉州市金门县(350527)也没有乡级
|
|
// 1. 福建省泉州市金门县(350527)也没有乡级
|
|
// 2. 甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),
|
|
// 2. 甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),
|
|
- // 但是它有乡级,因此也需要特殊处理
|
|
|
|
|
|
+ // 但是它有乡级,因此在抓取县级的时候已经重命名
|
|
if (areaName === '市辖区' || ['350527'].includes(areaCode)) continue
|
|
if (areaName === '市辖区' || ['350527'].includes(areaCode)) continue
|
|
- const o = await crawler.fetchStreets(areaCode)
|
|
|
|
|
|
+
|
|
|
|
+ // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)的乡级
|
|
|
|
+ // 页面的路由比较特别,需要手动拼接。
|
|
|
|
+ let route
|
|
|
|
+ if (['4420', '4419', '4604'].includes(cityCode)) route = `${provinceCode}/${cityCode}`
|
|
|
|
+
|
|
|
|
+ const o = await crawler.fetchStreets(areaCode, route)
|
|
for (const code in o) {
|
|
for (const code in o) {
|
|
const name = o[code]
|
|
const name = o[code]
|
|
rows.push({ code, name, areaCode, cityCode, provinceCode })
|
|
rows.push({ code, name, areaCode, cityCode, provinceCode })
|
|
@@ -155,7 +176,14 @@ exports.fetchVillages = async () => {
|
|
provinceCode } } = r.results[i]
|
|
provinceCode } } = r.results[i]
|
|
index++
|
|
index++
|
|
console.log(`[${index}/${count}]正在抓取村级数据,当前乡级:${streetCode} ${streetName}`)
|
|
console.log(`[${index}/${count}]正在抓取村级数据,当前乡级:${streetCode} ${streetName}`)
|
|
- const o = await crawler.fetchVillages(streetCode)
|
|
|
|
|
|
+
|
|
|
|
+ // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)的村级
|
|
|
|
+ // 页面的路由比较特别,需要手动拼接。
|
|
|
|
+ let route
|
|
|
|
+ const cCodeSuffix = cityCode.substr(2, 2)
|
|
|
|
+ if (['4420', '4419', '4604'].includes(cityCode)) route = `${provinceCode}/${cCodeSuffix}/${streetCode}`
|
|
|
|
+
|
|
|
|
+ const o = await crawler.fetchVillages(streetCode, route)
|
|
for (const code in o) {
|
|
for (const code in o) {
|
|
const name = o[code]
|
|
const name = o[code]
|
|
rows.push({ code, name, streetCode, areaCode, cityCode, provinceCode })
|
|
rows.push({ code, name, streetCode, areaCode, cityCode, provinceCode })
|