|
@@ -1,4 +1,5 @@
|
|
|
const crawler = require('./crawler')
|
|
|
+const Sequelize = require('sequelize')
|
|
|
const { Province, City, Area, Street, Village } = require('./sqlite')
|
|
|
|
|
|
// 每抓取 100 个页面再批量写入数据库
|
|
@@ -62,12 +63,19 @@ exports.fetchCities = async () => {
|
|
|
exports.fetchAreas = async () => {
|
|
|
await exports.fetchCities()
|
|
|
|
|
|
- const count = await City.count()
|
|
|
+ const fetchedCityCode = await Area.aggregate('cityCode', 'DISTINCT', { plain: false }).map(o => o.DISTINCT)
|
|
|
+ const where = { code: { [Sequelize.Op.notIn]: fetchedCityCode } }
|
|
|
+ const count = await City.count({ where })
|
|
|
+
|
|
|
+ if (count === 0) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
let index = 0
|
|
|
let hasNext = true
|
|
|
let after
|
|
|
while (hasNext) {
|
|
|
- const r = await City.paginate({ limit, after })
|
|
|
+ const r = await City.paginate({ where, limit, after })
|
|
|
const rows = []
|
|
|
for (let i = 0; i < r.results.length; i++) {
|
|
|
const { dataValues: {
|
|
@@ -112,12 +120,19 @@ exports.fetchAreas = async () => {
|
|
|
exports.fetchStreets = async () => {
|
|
|
await exports.fetchAreas()
|
|
|
|
|
|
- const count = await Area.count()
|
|
|
+ const fetchedAreaCode = await Street.aggregate('areaCode', 'DISTINCT', { plain: false }).map(o => o.DISTINCT)
|
|
|
+ const where = { code: { [Sequelize.Op.notIn]: fetchedAreaCode } }
|
|
|
+ const count = await Area.count({ where })
|
|
|
+
|
|
|
+ if (count === 0) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
let index = 0
|
|
|
let hasNext = true
|
|
|
let after
|
|
|
while (hasNext) {
|
|
|
- const r = await Area.paginate({ limit, after })
|
|
|
+ const r = await Area.paginate({ where, limit, after })
|
|
|
const rows = []
|
|
|
for (let i = 0; i < r.results.length; i++) {
|
|
|
const { dataValues: {
|
|
@@ -162,12 +177,19 @@ exports.fetchStreets = async () => {
|
|
|
exports.fetchVillages = async () => {
|
|
|
await exports.fetchStreets()
|
|
|
|
|
|
- const count = await Street.count()
|
|
|
+ const fetchedStreetCode = await Village.aggregate('streetCode', 'DISTINCT', { plain: false }).map(o => o.DISTINCT)
|
|
|
+ const where = { code: { [Sequelize.Op.notIn]: fetchedStreetCode } }
|
|
|
+ const count = await Street.count({ where })
|
|
|
+
|
|
|
+ if (count === 0) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
let index = 0
|
|
|
let hasNext = true
|
|
|
let after
|
|
|
while (hasNext) {
|
|
|
- const r = await Street.paginate({ limit, after })
|
|
|
+ const r = await Street.paginate({ where, limit, after })
|
|
|
const rows = []
|
|
|
for (let i = 0; i < r.results.length; i++) {
|
|
|
const { dataValues: {
|