worker.js 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. const crawler = require('./crawler')
  2. const { Province, City, Area, Street, Village } = require('./sqlite')
  3. // 每抓取 100 个页面再批量写入数据库
  4. const limit = 100
  5. /**
  6. * 抓取所有省级数据
  7. * @author https://github.com/modood
  8. * @datetime 2018-01-31 22:11
  9. */
  10. exports.fetchProvinces = async () => {
  11. console.log('[1/1]正在抓取省级数据...')
  12. const o = await crawler.fetchProvinces()
  13. const rows = []
  14. for (const code in o) {
  15. const name = o[code]
  16. rows.push({ code, name })
  17. }
  18. await Province.bulkCreate(rows, { ignoreDuplicates: true })
  19. }
  20. /**
  21. * 抓取所有地级数据
  22. * @author https://github.com/modood
  23. * @datetime 2018-01-31 22:13
  24. */
  25. exports.fetchCities = async () => {
  26. await exports.fetchProvinces()
  27. const count = await Province.count()
  28. let index = 0
  29. let hasNext = true
  30. let after
  31. while (hasNext) {
  32. const r = await Province.paginate({ limit, after })
  33. const rows = []
  34. for (let i = 0; i < r.results.length; i++) {
  35. const { dataValues: {
  36. name: provinceName,
  37. code: provinceCode } } = r.results[i]
  38. index++
  39. console.log(`[${index}/${count}]正在抓取地级数据,当前省级:${provinceCode} ${provinceName}`)
  40. const o = await crawler.fetchCities(provinceCode)
  41. for (const code in o) {
  42. const name = o[code]
  43. rows.push({ code, name, provinceCode })
  44. }
  45. }
  46. await City.bulkCreate(rows, { ignoreDuplicates: true })
  47. hasNext = r.cursors.hasNext
  48. after = r.cursors.after
  49. }
  50. }
  51. /**
  52. * 获取所有县级数据
  53. * @author https://github.com/modood
  54. * @datetime 2018-02-01 09:12
  55. */
  56. exports.fetchAreas = async () => {
  57. await exports.fetchCities()
  58. const count = await City.count()
  59. let index = 0
  60. let hasNext = true
  61. let after
  62. while (hasNext) {
  63. const r = await City.paginate({ limit, after })
  64. const rows = []
  65. for (let i = 0; i < r.results.length; i++) {
  66. const { dataValues: {
  67. name: cityName,
  68. code: cityCode,
  69. provinceCode } } = r.results[i]
  70. index++
  71. console.log(`[${index}/${count}]正在抓取县级数据,当前地级:${cityCode} ${cityName}`)
  72. // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级。
  73. if (['4420', '4419', '4604'].includes(cityCode)) continue
  74. const o = await crawler.fetchAreas(cityCode)
  75. for (const code in o) {
  76. const name = o[code]
  77. rows.push({ code, name, cityCode, provinceCode })
  78. }
  79. }
  80. await Area.bulkCreate(rows, { ignoreDuplicates: true })
  81. hasNext = r.cursors.hasNext
  82. after = r.cursors.after
  83. }
  84. // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)没有县级,
  85. // 需要手动插入。
  86. await Area.bulkCreate([
  87. { code: '441900', name: '东莞市', cityCode: '4419', provinceCode: '44' },
  88. { code: '442000', name: '中山市', cityCode: '4420', provinceCode: '44' },
  89. { code: '460400', name: '儋州市', cityCode: '4604', provinceCode: '46' }
  90. ], { ignoreDuplicates: true })
  91. // 特殊处理:甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),重命名。
  92. await Area.update({ name: '嘉峪关市' }, { where: { code: '620201' } })
  93. }
  94. /**
  95. * 获取所有乡级数据
  96. * @author https://github.com/modood
  97. * @datetime 2018-02-01 09:28
  98. */
  99. exports.fetchStreets = async () => {
  100. await exports.fetchAreas()
  101. const count = await Area.count()
  102. let index = 0
  103. let hasNext = true
  104. let after
  105. while (hasNext) {
  106. const r = await Area.paginate({ limit, after })
  107. const rows = []
  108. for (let i = 0; i < r.results.length; i++) {
  109. const { dataValues: {
  110. name: areaName,
  111. code: areaCode,
  112. cityCode,
  113. provinceCode } } = r.results[i]
  114. index++
  115. console.log(`[${index}/${count}]正在抓取乡级数据,当前县级:${areaCode} ${areaName}`)
  116. // 特殊处理:名为市辖区的县级没有乡级
  117. // 1. 福建省泉州市金门县(350527)也没有乡级
  118. // 2. 甘肃省嘉峪关市下仅一个县级名为市辖区(code: 620201),
  119. // 但是它有乡级,因此在抓取县级的时候已经重命名
  120. if (areaName === '市辖区' || ['350527'].includes(areaCode)) continue
  121. // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)的乡级
  122. // 页面的路由比较特别,需要手动拼接。
  123. let route
  124. if (['4420', '4419', '4604'].includes(cityCode)) route = `${provinceCode}/${cityCode}`
  125. const o = await crawler.fetchStreets(areaCode, route)
  126. for (const code in o) {
  127. const name = o[code]
  128. rows.push({ code, name, areaCode, cityCode, provinceCode })
  129. }
  130. }
  131. await Street.bulkCreate(rows, { ignoreDuplicates: true })
  132. hasNext = r.cursors.hasNext
  133. after = r.cursors.after
  134. }
  135. }
  136. /**
  137. * 抓取所有村级数据
  138. * @author https://github.com/modood
  139. * @datetime 2018-02-01 09:47
  140. */
  141. exports.fetchVillages = async () => {
  142. await exports.fetchStreets()
  143. const count = await Street.count()
  144. let index = 0
  145. let hasNext = true
  146. let after
  147. while (hasNext) {
  148. const r = await Street.paginate({ limit, after })
  149. const rows = []
  150. for (let i = 0; i < r.results.length; i++) {
  151. const { dataValues: {
  152. name: streetName,
  153. code: streetCode,
  154. areaCode,
  155. cityCode,
  156. provinceCode } } = r.results[i]
  157. index++
  158. console.log(`[${index}/${count}]正在抓取村级数据,当前乡级:${streetCode} ${streetName}`)
  159. // 特殊处理:广东省中山市(3320)、广东省东莞市(4419)、海南省儋州市(4604)的村级
  160. // 页面的路由比较特别,需要手动拼接。
  161. let route
  162. const cCodeSuffix = cityCode.substr(2, 2)
  163. if (['4420', '4419', '4604'].includes(cityCode)) route = `${provinceCode}/${cCodeSuffix}/${streetCode}`
  164. const o = await crawler.fetchVillages(streetCode, route)
  165. for (const code in o) {
  166. const name = o[code]
  167. rows.push({ code, name, streetCode, areaCode, cityCode, provinceCode })
  168. }
  169. }
  170. await Village.bulkCreate(rows, { ignoreDuplicates: true })
  171. hasNext = r.cursors.hasNext
  172. after = r.cursors.after
  173. }
  174. }