-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Labels
Description
目标模块
Lines 6 to 206 in c34afa8
| export async function* HuoDongXing(all?: boolean) { | |
| for (let page = 1, empty = true; ; page++) { | |
| const URL = | |
| 'https://www.huodongxing.com/eventlist?' + | |
| new URLSearchParams({ | |
| orderby: 'n', | |
| status: all ? '' : '1', | |
| tag: 'IT互联网', | |
| city: '全部', | |
| page: page + '' | |
| }); | |
| for await (const item of eventList( | |
| URL, | |
| '.search-tab-content-list .search-tab-content-item', | |
| '.item-title', | |
| '.item-data', | |
| '.item-dress', | |
| '.item-logo', | |
| '.item-title' | |
| )) { | |
| empty = false; | |
| const [start, end] = (item.start as string).split('-'); | |
| yield { | |
| ...item, | |
| start: makeDate(start), | |
| end: makeDate(end) | |
| } as Event; | |
| } | |
| if (empty) break; | |
| } | |
| } | |
| export async function* SegmentFault(all?: boolean) { | |
| for (let page = 1, empty = true; ; page++) { | |
| const URL = 'https://segmentfault.com/events?page=' + page, | |
| now = new Date(); | |
| for await (const item of eventList( | |
| URL, | |
| '.all-event-list .widget-event', | |
| '.title', | |
| '.widget-event__meta > :first-child', | |
| '.widget-event__meta > :last-child', | |
| '.widget-event__banner', | |
| '.title > a' | |
| )) { | |
| empty = false; | |
| const start = makeDate((item.start as string).slice(3)); | |
| if (!all && now > start) return; | |
| yield { | |
| ...item, | |
| start, | |
| end: null, | |
| address: (item.address as string).slice(3) | |
| } as Event; | |
| } | |
| if (empty) break; | |
| } | |
| } | |
| export async function* JueJin(all?: boolean) { | |
| for (let page = 1; ; page++) { | |
| const URI = | |
| 'https://event-storage-api-ms.juejin.im/v2/getEventList?' + | |
| new URLSearchParams({ | |
| src: 'web', | |
| orderType: 'startTime', | |
| pageNum: page + '' | |
| }), | |
| now = new Date(); | |
| const { d: list } = await (await fetch(URI)).json(); | |
| if (!list?.[0]) break; | |
| console.warn(URI); | |
| for (const { | |
| title, | |
| eventUrl, | |
| tagInfo, | |
| content, | |
| startTime, | |
| endTime, | |
| city, | |
| screenshot | |
| } of list) { | |
| const start = makeDate(startTime), | |
| end = makeDate(endTime); | |
| if (!all && now > start) return; | |
| yield { | |
| title, | |
| start, | |
| end, | |
| address: city, | |
| tags: tagInfo.map(({ title }) => title), | |
| summary: content, | |
| link: new URL(eventUrl), | |
| banner: new URL(screenshot) | |
| } as Event; | |
| } | |
| } | |
| } | |
| export async function* BaiGe(all?: boolean) { | |
| const { | |
| window: { | |
| document: { head } | |
| } | |
| } = await JSDOM.fromURL( | |
| 'https://www.bagevent.com/eventlist.html?f=1&tag=17&r=orderByNew' | |
| ); | |
| const { paramMap, imgDomain, mainDomain } = new Function(`${ | |
| [ | |
| ...head.querySelectorAll<HTMLScriptElement>('script:not(:empty)') | |
| ].find(code => /var param = \{[\s\S]+\}/.test(code.text)).text | |
| } | |
| return param;`)(); | |
| for (let page = 1; ; page++) { | |
| paramMap.pagingPage = page; | |
| const URI = `${mainDomain}/load/loadSearchEventList.do?${new URLSearchParams( | |
| paramMap | |
| )}`, | |
| now = new Date(); | |
| const { list } = ( | |
| await (await fetch(URI)).json() | |
| ).resultObject.valueList; | |
| if (!list?.[0]) break; | |
| console.warn(URI); | |
| for (const { | |
| event_name, | |
| start_time, | |
| address, | |
| logo, | |
| event_id | |
| } of list) { | |
| const start = makeDate(start_time); | |
| if (!all && now > start) return; | |
| yield { | |
| title: event_name, | |
| start, | |
| address, | |
| banner: imgDomain + logo, | |
| link: new URL(mainDomain + '/event/' + event_id) | |
| } as Event; | |
| } | |
| } | |
| } | |
| export async function* OSChina(all?: boolean) { | |
| for (let page = 1, empty = true; ; page++) { | |
| const body = new URLSearchParams({ | |
| tab: 'latest', | |
| time: 'all', | |
| p: page + '' | |
| }), | |
| URL = 'https://www.oschina.net/action/ajax/get_more_event_list', | |
| now = new Date(); | |
| const data = await (await fetch(URL, { method: 'POST', body })).text(); | |
| for await (const item of eventList( | |
| new JSDOM(data, { url: URL + '?' + body }), | |
| '.event-item', | |
| '.summary', | |
| '.when-where > label:first-of-type', | |
| '.when-where > label:last-of-type', | |
| '.item-banner img', | |
| '.item-banner > a' | |
| )) { | |
| empty = false; | |
| const start = makeDate(item.start as string); | |
| if (!all && now > start) return; | |
| yield { ...item, start, end: null } as Event; | |
| } | |
| if (empty) break; | |
| } | |
| } |
继承基类
IT-events/source/core/Crawler.ts
Lines 11 to 39 in c34afa8
| export abstract class DataCrawler<T> { | |
| declare ['constructor']: typeof DataCrawler; | |
| static baseURI = ''; | |
| static schema: URLPattern; | |
| abstract getList(URI: string): AsyncGenerator<T>; | |
| abstract getItem(URI: string): Promise<T>; | |
| makeYAML(list: T[]) { | |
| return stringify(list); | |
| } | |
| @logTime | |
| async saveList(URI: string) { | |
| const list: T[] = [], | |
| folder = join(process.cwd(), 'temp', new URL(URI).pathname); | |
| for await (const item of this.getList(URI)) { | |
| console.log(item); | |
| list.push(item); | |
| } | |
| const file = await saveFile(this.makeYAML(list), folder, 'index.yml'); | |
| return { list, folder, file }; | |
| } | |
| } |
参考实现
IT-events/source/Agenda/core.ts
Lines 22 to 83 in c34afa8
| export abstract class AgendaCrawler< | |
| A extends Agenda = Agenda, | |
| M extends Mentor = Mentor, | |
| F extends Forum = Forum | |
| > extends DataCrawler<A> { | |
| mentors: M[] = []; | |
| forums: F[] = []; | |
| override makeYAML(agendas: A[]) { | |
| return stringify({ | |
| mentors: this.mentors, | |
| forums: this.forums, | |
| agendas | |
| }); | |
| } | |
| makeCSV(agendas: A[]) { | |
| const { mentors, forums } = this; | |
| return { | |
| mentors, | |
| forums, | |
| agendas: agendas.map(({ mentor, forum, ...agenda }) => ({ | |
| ...agenda, | |
| mentor: mentor?.name, | |
| forum: forum?.name | |
| })) | |
| }; | |
| } | |
| @logTime | |
| async saveCSV(agendas: A[], folder: string) { | |
| for (const [name, rows] of Object.entries(this.makeCSV(agendas))) | |
| await saveFile(stringifyCSV(rows), folder, `${name}.csv`); | |
| } | |
| @logTime | |
| async saveImagesTo(folder: string) { | |
| for (const { avatar } of this.mentors) | |
| if (avatar) { | |
| const response = await fetch(avatar); | |
| const buffer = Buffer.from(await response.arrayBuffer()); | |
| const { ext } = await fromBuffer(buffer); | |
| await saveFile( | |
| buffer, | |
| folder, | |
| 'image', | |
| `${parse(avatar).name}.${ext}` | |
| ); | |
| } | |
| } | |
| override async saveList(URI: string) { | |
| const { list, folder, file } = await super.saveList(URI); | |
| await this.saveCSV(list, folder); | |
| await this.saveImagesTo(folder); | |
| return { list, folder, file }; | |
| } | |
| } |