|
@@ -13,37 +13,24 @@ const BufferHelper = require('bufferhelper')
|
|
|
* 村级(村委会居委会,Village) v
|
|
|
*/
|
|
|
|
|
|
+const pReg = /<td><a href='(.*?).html'>(.*?)<br\/><\/a><\/td>/g
|
|
|
+const casReg = /<tr class='.*?'><td><a href=.*?>(.*?)<\/a><\/td><td><a href=.*?>(.*?)<\/a><\/td><\/tr>/g
|
|
|
+const vReg = /<td>(.*?)<\/td><td>.*?<\/td><td>(.*?)<\/td>/g
|
|
|
+
|
|
|
const host = 'www.stats.gov.cn'
|
|
|
-const links = [
|
|
|
- {
|
|
|
- path: '/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html',
|
|
|
- regexp: /<td><a href='(.*?).html'>(.*?)<br\/><\/a><\/td>/g
|
|
|
- },
|
|
|
- {
|
|
|
- path: '/tjsj/tjbz/tjyqhdmhcxhfdm/2016/#{pCode}.html',
|
|
|
- regexp: /<tr class='.*?'><td><a href=.*?>(.*?)<\/a><\/td><td><a href=.*?>(.*?)<\/a><\/td><\/tr>/g
|
|
|
- },
|
|
|
- {
|
|
|
- path: '/tjsj/tjbz/tjyqhdmhcxhfdm/2016/#{pCode}/#{cCode}.html',
|
|
|
- regexp: /<tr class='.*?'><td><a href=.*?>(.*?)<\/a><\/td><td><a href=.*?>(.*?)<\/a><\/td><\/tr>/g
|
|
|
- },
|
|
|
- {
|
|
|
- path: '/tjsj/tjbz/tjyqhdmhcxhfdm/2016/#{pCode}/#{cCodeSuffix}/#{aCode}.html',
|
|
|
- regexp: /<tr class='.*?'><td><a href=.*?>(.*?)<\/a><\/td><td><a href=.*?>(.*?)<\/a><\/td><\/tr>/g
|
|
|
- },
|
|
|
- {
|
|
|
- path: '/tjsj/tjbz/tjyqhdmhcxhfdm/2016/#{pCode}/#{cCodeSuffix}/#{aCodeSuffix}/#{sCode}.html',
|
|
|
- regexp: /<td>(.*?)<\/td><td>.*?<\/td><td>(.*?)<\/td>/g
|
|
|
- }
|
|
|
-]
|
|
|
+const path = '/tjsj/tjbz/tjyqhdmhcxhfdm/2016/#{route}.html'
|
|
|
|
|
|
/**
|
|
|
* 抓取数据
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 19:23
|
|
|
*/
|
|
|
-exports.fetch = (host, path, regexp, codeLen) =>
|
|
|
- new Promise((resolve, reject) => http.get({ host, path, timeout: 3000 }, res => {
|
|
|
+exports.fetch = (host, route, regexp, codeLen) =>
|
|
|
+ new Promise((resolve, reject) => http.get({
|
|
|
+ host,
|
|
|
+ path: path.replace('#{route}', route),
|
|
|
+ timeout: 3000
|
|
|
+ }, res => {
|
|
|
const bufferHelper = new BufferHelper()
|
|
|
const statusCode = res.statusCode
|
|
|
|
|
@@ -72,10 +59,10 @@ exports.fetch = (host, path, regexp, codeLen) =>
|
|
|
*/
|
|
|
exports.fetchProvinces = async () => {
|
|
|
try {
|
|
|
- return await exports.fetch(host, links[0].path, links[0].regexp, 2)
|
|
|
+ return await exports.fetch(host, 'index', pReg, 2)
|
|
|
} catch (err) {
|
|
|
console.log(`抓取省级数据失败(${err}),正在重试...`)
|
|
|
- return this.fetchProvinces()
|
|
|
+ return exports.fetchProvinces()
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -85,13 +72,11 @@ exports.fetchProvinces = async () => {
|
|
|
* @datetime 2018-01-31 19:51
|
|
|
*/
|
|
|
exports.fetchCities = async (pCode) => {
|
|
|
- const path = links[1].path.replace('#{pCode}', pCode)
|
|
|
-
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[1].regexp, 4)
|
|
|
+ return await exports.fetch(host, pCode, casReg, 4)
|
|
|
} catch (err) {
|
|
|
console.log(`抓取省级(${pCode})的地级数据失败(${err}),正在重试...`)
|
|
|
- return this.fetchCities(pCode)
|
|
|
+ return exports.fetchCities(pCode)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -103,16 +88,12 @@ exports.fetchCities = async (pCode) => {
|
|
|
exports.fetchAreas = async (cCode) => {
|
|
|
cCode = cCode.toString()
|
|
|
const pCode = cCode.substr(0, 2)
|
|
|
- const path = links[2].path
|
|
|
- .replace('#{pCode}', pCode)
|
|
|
- .replace('#{cCode}', cCode)
|
|
|
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[2].regexp, 6)
|
|
|
+ return await exports.fetch(host, `${pCode}/${cCode}`, casReg, 6)
|
|
|
} catch (err) {
|
|
|
console.log(`抓取地级(${cCode})的县级数据失败(${err}),正在重试...`)
|
|
|
- console.log(path)
|
|
|
- return this.fetchAreas(cCode)
|
|
|
+ return exports.fetchAreas(cCode)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -121,21 +102,17 @@ exports.fetchAreas = async (cCode) => {
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 20:08
|
|
|
*/
|
|
|
-exports.fetchStreets = async (aCode) => {
|
|
|
+exports.fetchStreets = async (aCode, route) => {
|
|
|
aCode = aCode.toString()
|
|
|
const pCode = aCode.substr(0, 2)
|
|
|
const cCodeSuffix = aCode.substr(2, 2)
|
|
|
- const path = links[3].path
|
|
|
- .replace('#{pCode}', pCode)
|
|
|
- .replace('#{cCodeSuffix}', cCodeSuffix)
|
|
|
- .replace('#{aCode}', aCode)
|
|
|
+ const _route = route || `${pCode}/${cCodeSuffix}/${aCode}`
|
|
|
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[3].regexp, 9)
|
|
|
+ return await exports.fetch(host, _route, casReg, 9)
|
|
|
} catch (err) {
|
|
|
console.log(`抓取县级(${aCode})的乡级数据失败(${err}),正在重试...`)
|
|
|
- console.log(path)
|
|
|
- return this.fetchStreets(aCode)
|
|
|
+ return exports.fetchStreets(aCode, route)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -144,21 +121,17 @@ exports.fetchStreets = async (aCode) => {
|
|
|
* @author modood <https://github.com/modood>
|
|
|
* @datetime 2018-01-31 20:19
|
|
|
*/
|
|
|
-exports.fetchVillages = async (sCode) => {
|
|
|
+exports.fetchVillages = async (sCode, route) => {
|
|
|
sCode = sCode.toString()
|
|
|
const pCode = sCode.substr(0, 2)
|
|
|
const cCodeSuffix = sCode.substr(2, 2)
|
|
|
const aCodeSuffix = sCode.substr(4, 2)
|
|
|
- const path = links[4].path
|
|
|
- .replace('#{pCode}', pCode)
|
|
|
- .replace('#{cCodeSuffix}', cCodeSuffix)
|
|
|
- .replace('#{aCodeSuffix}', aCodeSuffix)
|
|
|
- .replace('#{sCode}', sCode)
|
|
|
+ const _route = route || `${pCode}/${cCodeSuffix}/${aCodeSuffix}/${sCode}`
|
|
|
|
|
|
try {
|
|
|
- return await exports.fetch(host, path, links[4].regexp, 12)
|
|
|
+ return await exports.fetch(host, _route, vReg, 12)
|
|
|
} catch (err) {
|
|
|
console.log(`抓取乡级(${sCode})的村级数据失败(${err}),正在重试...`)
|
|
|
- return this.fetchVillages(sCode)
|
|
|
+ return exports.fetchVillages(sCode, route)
|
|
|
}
|
|
|
}
|