|
@@ -4,12 +4,13 @@ const iconv = require('iconv-lite')
|
|
|
const BufferHelper = require('bufferhelper')
|
|
|
|
|
|
/*
|
|
|
- * 备注:命名简写描述
|
|
|
- * 省份(Province) p
|
|
|
- * 城市(City) c
|
|
|
- * 区县(Area) a
|
|
|
- * 乡镇街道(Street) s
|
|
|
- * 村(居)委会(Village) v
|
|
|
+ * 命名简写备注
|
|
|
+ *
|
|
|
+ * 省级(省份,Province) p
|
|
|
+ * 地级(城市,City) c
|
|
|
+ * 县级(区县,Area) a
|
|
|
+ * 乡级(乡镇街道,Street) s
|
|
|
+ * 村级(村委会居委会,Village) v
|
|
|
*/
|
|
|
|
|
|
const host = 'www.stats.gov.cn'
|
|
@@ -41,8 +42,8 @@ const links = [
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 19:23
|
|
|
*/
|
|
|
-exports.fetch = (host, path, regexp) =>
|
|
|
- new Promise((resolve, reject) => http.get({ host, path, timeout: 1000 }, res => {
|
|
|
+exports.fetch = (host, path, regexp, codeLen) =>
|
|
|
+ new Promise((resolve, reject) => http.get({ host, path, timeout: 3000 }, res => {
|
|
|
const bufferHelper = new BufferHelper()
|
|
|
const statusCode = res.statusCode
|
|
|
|
|
@@ -58,28 +59,28 @@ exports.fetch = (host, path, regexp) =>
|
|
|
|
|
|
const result = {}
|
|
|
let current
|
|
|
- while ((current = regexp.exec(rawData)) !== null) result[current[1]] = current[2].trim()
|
|
|
+ while ((current = regexp.exec(rawData)) !== null) result[current[1].substr(0, codeLen)] = current[2].trim()
|
|
|
|
|
|
return resolve(result)
|
|
|
})
|
|
|
}).on('error', reject).on('timeout', () => reject(new Error('timeout'))))
|
|
|
|
|
|
/**
|
|
|
- * 抓取省份数据
|
|
|
+ * 抓取省级数据
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 19:40
|
|
|
*/
|
|
|
exports.fetchProvinces = async () => {
|
|
|
try {
|
|
|
- return await exports.fetch(host, links[0].path, links[0].regexp)
|
|
|
+ return await exports.fetch(host, links[0].path, links[0].regexp, 2)
|
|
|
} catch (err) {
|
|
|
- console.error(err)
|
|
|
- process.exit(-1)
|
|
|
+ console.log(`抓取省级数据失败(${err}),正在重试...`)
|
|
|
+ return this.fetchProvinces()
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 抓取城市数据
|
|
|
+ * 抓取地级数据
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 19:51
|
|
|
*/
|
|
@@ -87,15 +88,15 @@ exports.fetchCities = async (pCode) => {
|
|
|
const path = links[1].path.replace('#{pCode}', pCode)
|
|
|
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[1].regexp)
|
|
|
+ return await exports.fetch(host, path, links[1].regexp, 4)
|
|
|
} catch (err) {
|
|
|
- console.log(`抓取省份(${pCode})的城市数据失败(${err}),正在重试...`)
|
|
|
+ console.log(`抓取省级(${pCode})的地级数据失败(${err}),正在重试...`)
|
|
|
return this.fetchCities(pCode)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 抓取区县数据
|
|
|
+ * 抓取县级数据
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 20:03
|
|
|
*/
|
|
@@ -107,15 +108,16 @@ exports.fetchAreas = async (cCode) => {
|
|
|
.replace('#{cCode}', cCode)
|
|
|
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[2].regexp)
|
|
|
+ return await exports.fetch(host, path, links[2].regexp, 6)
|
|
|
} catch (err) {
|
|
|
- console.log(`抓取城市(${cCode})的区县数据失败(${err}),正在重试...`)
|
|
|
+ console.log(`抓取地级(${cCode})的县级数据失败(${err}),正在重试...`)
|
|
|
+ console.log(path)
|
|
|
return this.fetchAreas(cCode)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 抓取街道数据
|
|
|
+ * 抓取乡级数据
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 20:08
|
|
|
*/
|
|
@@ -129,15 +131,16 @@ exports.fetchStreets = async (aCode) => {
|
|
|
.replace('#{aCode}', aCode)
|
|
|
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[3].regexp)
|
|
|
+ return await exports.fetch(host, path, links[3].regexp, 9)
|
|
|
} catch (err) {
|
|
|
- console.log(`抓取区县(${aCode})的乡镇街道数据失败(${err}),正在重试...`)
|
|
|
+ console.log(`抓取县级(${aCode})的乡级数据失败(${err}),正在重试...`)
|
|
|
+ console.log(path)
|
|
|
return this.fetchStreets(aCode)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 抓取村(居)委会数据
|
|
|
+ * 抓取村级数据
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 20:19
|
|
|
*/
|
|
@@ -153,9 +156,9 @@ exports.fetchVillages = async (sCode) => {
|
|
|
.replace('#{sCode}', sCode)
|
|
|
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[4].regexp)
|
|
|
+ return await exports.fetch(host, path, links[4].regexp, 12)
|
|
|
} catch (err) {
|
|
|
- console.log(`抓取乡镇街道(${sCode})的村(居)委会数据失败(${err}),正在重试...`)
|
|
|
+ console.log(`抓取乡级(${sCode})的村级数据失败(${err}),正在重试...`)
|
|
|
return this.fetchVillages(sCode)
|
|
|
}
|
|
|
}
|