Эх сурвалжийг харах

refactor: 批量写入数据库

modood 7 жил өмнө
parent
commit
b076c1d66f
1 өөрчлөгдсөн 42 нэмэгдсэн , 26 устгасан
  1. 42 26
      lib/worker.js

+ 42 - 26
lib/worker.js

@@ -1,20 +1,22 @@
 const crawler = require('./crawler')
 const { Province, City, Area, Street, Village } = require('./sqlite')
 
+const limit = 100
+
 /**
  * 抓取所有省级数据
  * @author   https://github.com/modood
  * @datetime 2018-01-31 22:11
  */
 exports.fetchProvinces = async () => {
+  console.log('[1/1]正在抓取省级数据...')
   const o = await crawler.fetchProvinces()
+  const rows = []
   for (const code in o) {
     const name = o[code]
-    await Province.findOrCreate({
-      where: { code },
-      defaults: { code, name }
-    })
+    rows.push({ code, name })
   }
+  await Province.bulkCreate(rows, { ignoreDuplicates: true })
 }
 
 /**
@@ -25,22 +27,26 @@ exports.fetchProvinces = async () => {
 exports.fetchCities = async () => {
   await exports.fetchProvinces()
 
+  const count = await Province.count()
+  let index = 0
   let hasNext = true
   let after
-  const limit = 11
   while (hasNext) {
     const r = await Province.paginate({ limit, after })
+    const rows = []
     for (let i = 0; i < r.results.length; i++) {
-      const { dataValues: { code: provinceCode } } = r.results[i]
+      const { dataValues: {
+        name: provinceName,
+        code: provinceCode } } = r.results[i]
+      index++
+      console.log(`[${index}/${count}]正在抓取地级数据,当前省级:${provinceCode} ${provinceName}`)
       const o = await crawler.fetchCities(provinceCode)
       for (const code in o) {
         const name = o[code]
-        await City.findOrCreate({
-          where: { code },
-          defaults: { code, name, provinceCode }
-        })
+        rows.push({ code, name, provinceCode })
       }
     }
+    await City.bulkCreate(rows, { ignoreDuplicates: true })
 
     hasNext = r.cursors.hasNext
     after = r.cursors.after
@@ -55,24 +61,29 @@ exports.fetchCities = async () => {
 exports.fetchAreas = async () => {
   await exports.fetchCities()
 
+  const count = await City.count()
+  let index = 0
   let hasNext = true
   let after
-  const limit = 10
   while (hasNext) {
     const r = await City.paginate({ limit, after })
+    const rows = []
     for (let i = 0; i < r.results.length; i++) {
-      const { dataValues: { code: cityCode, provinceCode } } = r.results[i]
+      const { dataValues: {
+        name: cityName,
+        code: cityCode,
+        provinceCode } } = r.results[i]
+      index++
+      console.log(`[${index}/${count}]正在抓取县级数据,当前地级:${cityCode} ${cityName}`)
       // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级
       if (['4420', '4419', '4604'].includes(cityCode)) continue
       const o = await crawler.fetchAreas(cityCode)
       for (const code in o) {
         const name = o[code]
-        await Area.findOrCreate({
-          where: { code },
-          defaults: { code, name, cityCode, provinceCode }
-        })
+        rows.push({ code, name, cityCode, provinceCode })
       }
     }
+    await Area.bulkCreate(rows, { ignoreDuplicates: true })
 
     hasNext = r.cursors.hasNext
     after = r.cursors.after
@@ -87,17 +98,21 @@ exports.fetchAreas = async () => {
 exports.fetchStreets = async () => {
   await exports.fetchAreas()
 
+  const count = await Area.count()
+  let index = 0
   let hasNext = true
   let after
-  const limit = 10
   while (hasNext) {
     const r = await Area.paginate({ limit, after })
+    const rows = []
     for (let i = 0; i < r.results.length; i++) {
       const { dataValues: {
         name: areaName,
         code: areaCode,
         cityCode,
         provinceCode } } = r.results[i]
+      index++
+      console.log(`[${index}/${count}]正在抓取乡级数据,当前县级:${areaCode} ${areaName}`)
       // 特殊处理:名为市辖区的县级没有乡级
       // 1. 福建省泉州市金门县(350527)也没有乡级
       // 2. 甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),
@@ -106,12 +121,10 @@ exports.fetchStreets = async () => {
       const o = await crawler.fetchStreets(areaCode)
       for (const code in o) {
         const name = o[code]
-        await Street.findOrCreate({
-          where: { code },
-          defaults: { code, name, areaCode, cityCode, provinceCode }
-        })
+        rows.push({ code, name, areaCode, cityCode, provinceCode })
       }
     }
+    await Street.bulkCreate(rows, { ignoreDuplicates: true })
 
     hasNext = r.cursors.hasNext
     after = r.cursors.after
@@ -126,26 +139,29 @@ exports.fetchStreets = async () => {
 exports.fetchVillages = async () => {
   await exports.fetchStreets()
 
+  const count = await Street.count()
+  let index = 0
   let hasNext = true
   let after
-  const limit = 10
   while (hasNext) {
     const r = await Street.paginate({ limit, after })
+    const rows = []
     for (let i = 0; i < r.results.length; i++) {
       const { dataValues: {
+        name: streetName,
         code: streetCode,
         areaCode,
         cityCode,
         provinceCode } } = r.results[i]
+      index++
+      console.log(`[${index}/${count}]正在抓取村级数据,当前乡级:${streetCode} ${streetName}`)
       const o = await crawler.fetchVillages(streetCode)
       for (const code in o) {
         const name = o[code]
-        await Village.findOrCreate({
-          where: { code },
-          defaults: { code, name, streetCode, areaCode, cityCode, provinceCode }
-        })
+        rows.push({ code, name, streetCode, areaCode, cityCode, provinceCode })
       }
     }
+    await Village.bulkCreate(rows, { ignoreDuplicates: true })
 
     hasNext = r.cursors.hasNext
     after = r.cursors.after