Selaa lähdekoodia

feat: 国家统计局最新数据(截止时间:2022-10-31,发布时间:2022-12-29)

closes #124
modood 2 vuotta sitten
vanhempi
sitoutus
17f4e13215
2 muutettua tiedostoa jossa 8 lisäystä ja 4 poistoa
  1. 1 1
      README.md
  2. 7 3
      lib/crawler.js

+ 1 - 1
README.md

@@ -14,7 +14,7 @@
     * [中华人民共和国国家统计局-统计用区划和城乡划分代码](http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/)
     * [中华人民共和国国家统计局-统计用区划代码和城乡划分代码编制规则](http://www.stats.gov.cn/tjsj/tjbz/200911/t20091125_8667.html)
 *   本项目已更新至:
-    * [2021年统计用区划代码和城乡划分代码(截止时间:2021-10-31,发布时间:2021-12-30)](http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/index.html)
+    * [2022年统计用区划代码和城乡划分代码(截止时间:2022-10-31,发布时间:2022-12-29)](http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022/index.html)
 
 ## 数据下载
 

+ 7 - 3
lib/crawler.js

@@ -1,4 +1,5 @@
 const http = require('http')
+const zlib = require('zlib')
 
 const iconv = require('iconv-lite')
 const minify = require('html-minifier').minify
@@ -19,7 +20,7 @@ const casReg = /<tr class='.*?'><td><a href=.*?>(.*?)<\/a><\/td><td><a href=.*?>
 const vReg = /<tr class='.*?'><td>(.*?)<\/td><td>.*?<\/td><td>(.*?)<\/td><\/tr>/g
 
 const host = 'www.stats.gov.cn'
-const path = '/tjsj/tjbz/tjyqhdmhcxhfdm/2021/#{route}.html'
+const path = '/tjsj/tjbz/tjyqhdmhcxhfdm/2022/#{route}.html'
 
 /**
  * 抓取数据
@@ -50,13 +51,16 @@ exports.fetch = (host, route, regexp, codeLen) =>
     res.on('data', chunk => bufferHelper.concat(chunk))
 
     res.on('end', () => {
-      const rawData = minify(iconv.decode(bufferHelper.toBuffer(), 'UTF-8'), { collapseWhitespace: true, quoteCharacter: '\'' })
+      let raw = iconv.decode(bufferHelper.toBuffer(), 'UTF-8')
+      if (!raw.includes('国家统计局')) {
+        raw = iconv.decode(zlib.gunzipSync(bufferHelper.toBuffer()), 'UTF-8')
+      }
+      const rawData = minify(raw, { collapseWhitespace: true, quoteCharacter: '\'' })
 
       const result = {}
       let current
       while ((current = regexp.exec(rawData)) !== null) result[current[1].substr(0, codeLen)] = current[2].trim()
       if (Object.keys(result).length === 0) {
-        const raw = iconv.decode(bufferHelper.toBuffer(), 'UTF-8')
         if (raw.includes('请开启JavaScript并刷新该页')) {
           console.log('\n温馨提示:请求过于频繁已被目标网站限制,当前抓取进度已保存,请五分钟后再试...\n')
           process.exit(0)