worker.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. const crawler = require('./crawler')
  2. const { Province, City, Area, Street, Village } = require('./sqlite')
  3. const limit = 100
  4. /**
  5. * 抓取所有省级数据
  6. * @author https://github.com/modood
  7. * @datetime 2018-01-31 22:11
  8. */
  9. exports.fetchProvinces = async () => {
  10. console.log('[1/1]正在抓取省级数据...')
  11. const o = await crawler.fetchProvinces()
  12. const rows = []
  13. for (const code in o) {
  14. const name = o[code]
  15. rows.push({ code, name })
  16. }
  17. await Province.bulkCreate(rows, { ignoreDuplicates: true })
  18. }
  19. /**
  20. * 抓取所有地级数据
  21. * @author https://github.com/modood
  22. * @datetime 2018-01-31 22:13
  23. */
  24. exports.fetchCities = async () => {
  25. await exports.fetchProvinces()
  26. const count = await Province.count()
  27. let index = 0
  28. let hasNext = true
  29. let after
  30. while (hasNext) {
  31. const r = await Province.paginate({ limit, after })
  32. const rows = []
  33. for (let i = 0; i < r.results.length; i++) {
  34. const { dataValues: {
  35. name: provinceName,
  36. code: provinceCode } } = r.results[i]
  37. index++
  38. console.log(`[${index}/${count}]正在抓取地级数据,当前省级:${provinceCode} ${provinceName}`)
  39. const o = await crawler.fetchCities(provinceCode)
  40. for (const code in o) {
  41. const name = o[code]
  42. rows.push({ code, name, provinceCode })
  43. }
  44. }
  45. await City.bulkCreate(rows, { ignoreDuplicates: true })
  46. hasNext = r.cursors.hasNext
  47. after = r.cursors.after
  48. }
  49. }
  50. /**
  51. * 获取所有县级数据
  52. * @author https://github.com/modood
  53. * @datetime 2018-02-01 09:12
  54. */
  55. exports.fetchAreas = async () => {
  56. await exports.fetchCities()
  57. const count = await City.count()
  58. let index = 0
  59. let hasNext = true
  60. let after
  61. while (hasNext) {
  62. const r = await City.paginate({ limit, after })
  63. const rows = []
  64. for (let i = 0; i < r.results.length; i++) {
  65. const { dataValues: {
  66. name: cityName,
  67. code: cityCode,
  68. provinceCode } } = r.results[i]
  69. index++
  70. console.log(`[${index}/${count}]正在抓取县级数据,当前地级:${cityCode} ${cityName}`)
  71. // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级
  72. if (['4420', '4419', '4604'].includes(cityCode)) continue
  73. const o = await crawler.fetchAreas(cityCode)
  74. for (const code in o) {
  75. const name = o[code]
  76. rows.push({ code, name, cityCode, provinceCode })
  77. }
  78. }
  79. await Area.bulkCreate(rows, { ignoreDuplicates: true })
  80. hasNext = r.cursors.hasNext
  81. after = r.cursors.after
  82. }
  83. }
  84. /**
  85. * 获取所有乡级数据
  86. * @author https://github.com/modood
  87. * @datetime 2018-02-01 09:28
  88. */
  89. exports.fetchStreets = async () => {
  90. await exports.fetchAreas()
  91. const count = await Area.count()
  92. let index = 0
  93. let hasNext = true
  94. let after
  95. while (hasNext) {
  96. const r = await Area.paginate({ limit, after })
  97. const rows = []
  98. for (let i = 0; i < r.results.length; i++) {
  99. const { dataValues: {
  100. name: areaName,
  101. code: areaCode,
  102. cityCode,
  103. provinceCode } } = r.results[i]
  104. index++
  105. console.log(`[${index}/${count}]正在抓取乡级数据,当前县级:${areaCode} ${areaName}`)
  106. // 特殊处理:名为市辖区的县级没有乡级
  107. // 1. 福建省泉州市金门县(350527)也没有乡级
  108. // 2. 甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),
  109. // 但是它有乡级,因此也需要特殊处理
  110. if (areaName === '市辖区' || ['350527'].includes(areaCode)) continue
  111. const o = await crawler.fetchStreets(areaCode)
  112. for (const code in o) {
  113. const name = o[code]
  114. rows.push({ code, name, areaCode, cityCode, provinceCode })
  115. }
  116. }
  117. await Street.bulkCreate(rows, { ignoreDuplicates: true })
  118. hasNext = r.cursors.hasNext
  119. after = r.cursors.after
  120. }
  121. }
  122. /**
  123. * 抓取所有村级数据
  124. * @author https://github.com/modood
  125. * @datetime 2018-02-01 09:47
  126. */
  127. exports.fetchVillages = async () => {
  128. await exports.fetchStreets()
  129. const count = await Street.count()
  130. let index = 0
  131. let hasNext = true
  132. let after
  133. while (hasNext) {
  134. const r = await Street.paginate({ limit, after })
  135. const rows = []
  136. for (let i = 0; i < r.results.length; i++) {
  137. const { dataValues: {
  138. name: streetName,
  139. code: streetCode,
  140. areaCode,
  141. cityCode,
  142. provinceCode } } = r.results[i]
  143. index++
  144. console.log(`[${index}/${count}]正在抓取村级数据,当前乡级:${streetCode} ${streetName}`)
  145. const o = await crawler.fetchVillages(streetCode)
  146. for (const code in o) {
  147. const name = o[code]
  148. rows.push({ code, name, streetCode, areaCode, cityCode, provinceCode })
  149. }
  150. }
  151. await Village.bulkCreate(rows, { ignoreDuplicates: true })
  152. hasNext = r.cursors.hasNext
  153. after = r.cursors.after
  154. }
  155. }