欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

nodejs 抓取全国固定电话区号(使用固定电话区号 json)

最编程 2024-04-23 12:41:33
...

背景:需要座机区号,但是没有现成的json,遂决定去爬一个。

爬那种年久失修的被遗忘的网站,一般遇到的问题比较棘手的就是编码问题,gbk、bg2312这种。因为选的网络请求依赖设置不对,有时候拿到的就是乱码,用iconv解不出来。
然后咧,仔细查阅了一下superagent这个用起来比较顺手的库的文档。

其实人家是可以设置编码方式的,不需要自己傻傻地折腾iconv。


在这里有写

感兴趣的自己去看 https://www.npmjs.com/package/superagent#browser

首先就是装依赖

npm install superagent
npm i superagent-charset --save
npm install cheerio

我就只取了想要的电话区号,还用了个特丑的正则- -
有知道的可以告诉我咋修改得好看一些

以下是完整代码

const superagent = require('superagent');
require('superagent-charset')(superagent);
let cheerio = require('cheerio');
let fs = require('fs');
let num = 1;
let task = function(href) {
    return superagent
        .get(href)
        .charset('gb2312')
        .then(res => {
            // debugger;
            let $ = cheerio.load(res.text);
            let data = [];
            // debugger;
            console.log(num, href);
            $('#contents').each((index, element) => {
                let content = $(element).text();
                let arr = content.match(/(\d{3,4})\,/g).map(item => {
                    return item.replace(',', '');
                });
                data = [...data, ...arr];
            });
            return Promise.resolve(data);
        });
};

let main = async function() {
    let resArr = [];
    while (num <= 19) {
        let dataArr = await task(`https://doc.wendoc.com/b0ac7f2bf0ad4e6ccae1b80674d2fd6eb62f3e75b-${num}.html`);
        resArr = [...resArr, ...dataArr];
        num++;
    }
    let set = new Set(resArr);
    resArr = Array.from(set);
    fs.writeFile('./res.js', 'export default' + JSON.stringify(resArr), function() {});
};
main();

爬取的全国座机区号

export default [
    '010',
    '021',
    '022',
    '023',
    '0551',
    '0564',
    '0563',
    '0562',
    '0561',
    '0559',
    '0558',
    '0557',
    '0556',
    '0555',
    '0554',
    '0553',
    '0552',
    '0550',
    '0566',
    '0565',
    '0591',
    '0592',
    '0593',
    '0594',
    '0595',
    '0596',
    '0597',
    '0598',
    '0599',
    '0931',
    '0930',
    '0932',
    '0933',
    '0993',
    '0934',
    '0935',
    '0936',
    '0937',
    '0938',
    '0939',
    '0943',
    '9401',
    '9402',
    '9403',
    '9404',
    '9405',
    '9406',
    '9407',
    '9411',
    '9412',
    '9413',
    '9414',
    '9415',
    '9416',
    '9417',
    '9418',
    '9421',
    '9422',
    '9423',
    '9424',
    '9425',
    '9426',
    '9441',
    '9442',
    '9443',
    '9444',
    '9445',
    '9446',
    '9447',
    '9491',
    '9492',
    '9493',
    '9494',
    '9495',
    '9496',
    '9497',
    '9498',
    '020',
    '0660',
    '0661',
    '0662',
    '0663',
    '0668',
    '0750',
    '0751',
    '0752',
    '0753',
    '0754',
    '0755',
    '0756',
    '0757',
    '0758',
    '0759',
    '0760',
    '0762',
    '0763',
    '0765',
    '0766',
    '0768',
    '0769',
    '0771',
    '0770',
    '0772',
    '0773',
    '0774',
    '0775',
    '0776',
    '0777',
    '0778',
    '0779',
    '0851',
    '0852',
    '0853',
    '0854',
    '0855',
    '0856',
    '0857',
    '0858',
    '0859',
    '8631',
    '8632',
    '8633',
    '8634',
    '8635',
    '8640',
    '8641',
    '8642',
    '8643',
    '8644',
    '8645',
    '8646',
    '8647',
    '8648',
    '8649',
    '8650',
    '8651',
    '8652',
    '8653',
    '8654',
    '8655',
    '8656',
    '8657',
    '8658',
    '8659',
    '8661',
    '8662',
    '8663',
    '8664',
    '8665',
    '8666',
    '8667',
    '8668',
    '8669',
    '8670',
    '8671',
    '8672',
    '8673',
    '8674',
    '8675',
    '8676',
    '8677',
    '8680',
    '8681',
    '8682',
    '8686',
    '8687',
    '8688',
    '8689',
    '0898',
    '0899',
    '0890',
    '0311',
    '0312',
    '0313',
    '0314',
    '0315',
    '0316',
    '0317',
    '0318',
    '0319',
    '0310',
    '0335',
    '0371',
    '0370',
    '0372',
    '0373',
    '0374',
    '0375',
    '0376',
    '0377',
    '0378',
    '0379',
    '0391',
    '0392',
    '0393',
    '0394',
    '0395',
    '0396',
    '0397',
    '0398',
    '0451',
    '0452',
    '0453',
    '0454',
    '0458',
    '0456',
    '0457',
    '0459',
    '027',
    '0710',
    '0711',
    '0712',
    '0713',
    '0714',
    '0715',
    '0716',
    '0717',
    '0718',
    '0719',
    '0722',
    '0727',
    '0728',
    '0731',
    '0730',
    '0732',
    '0733',
    '0734',
    '0735',
    '0736',
    '0737',
    '0738',
    '0739',
    '0743',
    '0744',
    '0745',
    '0746',
    '0431',
    '0432',
    '0433',
    '0434',
    '0435',
    '0436',
    '0437',
    '0438',
    '0439',
    '0440',
    '0448',
    '025',
    '0510',
    '0511',
    '0512',
    '0513',
    '0514',
    '0515',
    '0516',
    '0517',
    '0518',
    '0519',
    '0520',
    '0523',
    '0527',
    '0791',
    '0790',
    '0792',
    '0793',
    '0794',
    '0795',
    '0796',
    '0797',
    '0798',
    '0799',
    '0701',
    '024',
    '0410',
    '0411',
    '0412',
    '0413',
    '0414',
    '0415',
    '0416',
    '0417',
    '0418',
    '0419',
    '0421',
    '0427',
    '0429',
    '0471',
    '0470',
    '0472',
    '0473',
    '0474',
    '0475',
    '0476',
    '0477',
    '0478',
    '0479',
    '0482',
    '4831',
    '4887',
    '4888',
    '0951',
    '0952',
    '0953',
    '0954',
    '0971',
    '0970',
    '0972',
    '0973',
    '0974',
    '0975',
    '0976',
    '0977',
    '0978',
    '0979',
    '9820',
    '9828',
    '9831',
    '9832',
    '9833',
    '9834',
    '9835',
    '9836',
    '9837',
    '9838',
    '9839',
    '9840',
    '9841',
    '9842',
    '9843',
    '9844',
    '9846',
    '9847',
    '9848',
    '9849',
    '9851',
    '9852',
    '9853',
    '9854',
    '0531',
    '0530',
    '0532',
    '0533',
    '0534',
    '0535',
    '0536',
    '0537',
    '0538',
    '0539',
    '0543',
    '0546',
    '0631',
    '0632',
    '0633',
    '0634',
    '0635',
    '0351',
    '0349',
    '0350',
    '0352',
    '0353',
    '0354',
    '0355',
    '0356',
    '0357',
    '0358',
    '0359',
    '029',
    '0913',
    '0912',
    '0911',
    '0910',
    '0914',
    '0916',
    '0915',
    '9244',
    '9243',
    '9242',
    '9240',
    '9229',
    '9228',
    '9227',
    '9226',
    '9225',
    '9224',
    '9223',
    '9222',
    '9221',
    '9220',
    '0919',
    '0917',
    '028',
    '0812',
    '0813',
    '0816',
    '0817',
    '0818',
    '0825',
    '0826',
    '0830',
    '0831',
    '0832',
    '0833',
    '0834',
    '0835',
    '0836',
    '0837',
    '0838',
    '0839',
    '8225',
    '8228',
    '8229',
    '8241',
    '8247',
    '8270',
    '8277',
    '8278',
    '8279',
    '8295',
    '8296',
    '8298',
    '8407',
    '8411',
    '8417',
    '8430',
    '7437',
    '8440',
    '8444',
    '8445',
    '8447',
    '8452',
    '8453',
    '8455',
    '8456',
    '8457',
    '8458',
    '8459',
    '8461',
    '8462',
    '8463',
    '8465',
    '8466',
    '8489',
    '8493',
    '0891',
    '0892',
    '0894',
    '0895',
    '8015',
    '8016',
    '8017',
    '8018',
    '8040',
    '8049',
    '8051',
    '8054',
    '8056',
    '8057',
    '8059',
    '8061',
    '8062',
    '0896',
    '8067',
    '8069',
    '8073',
    '8078',
    '8081',
    '0893',
    '0991',
    '0901',
    '0902',
    '0903',
    '0906',
    '0908',
    '0909',
    '0990',
    '0992',
    '0994',
    '0995',
    '0996',
    '0997',
    '0998',
    '0999',
    '0871',
    '0870',
    '0872',
    '0873',
    '0874',
    '0875',
    '0876',
    '0877',
    '0878',
    '0879',
    '0881',
    '0883',
    '0886',
    '0887',
    '0888',
    '0691',
    '0692',
    '0571',
    '0570',
    '0572',
    '0573',
    '0574',
    '0575',
    '0576',
    '0577',
    '0578',
    '0579',
    '0580',
    '886',
    '887',
    '888',
    '889',
    '890',
    '891',
    '892',
    '893',
    '894',
    '895',
    '896',
    '897',
    '898',
    '899',
    '900',
    '901',
    '902',
    '903',
    '904',
    '905',
    '906',
    '907',
    '908',
    '909',
    '910',
    '911',
    '912',
    '913'
];