Skip to content
GitLab
    • Explore Projects Groups Snippets
Projects Groups Snippets
  • /
  • Help
    • Help
    • Support
    • Community forum
    • Submit feedback
    • Contribute to GitLab
  • Sign in / Register
  • H headless-chrome-crawler
  • Project information
    • Project information
    • Activity
    • Labels
    • Members
  • Repository
    • Repository
    • Files
    • Commits
    • Branches
    • Tags
    • Contributors
    • Graph
    • Compare
  • Issues 29
    • Issues 29
    • List
    • Boards
    • Service Desk
    • Milestones
  • Merge requests 4
    • Merge requests 4
  • CI/CD
    • CI/CD
    • Pipelines
    • Jobs
    • Schedules
  • Deployments
    • Deployments
    • Environments
    • Releases
  • Packages and registries
    • Packages and registries
    • Package Registry
    • Infrastructure Registry
  • Monitor
    • Monitor
    • Incidents
  • Analytics
    • Analytics
    • Value stream
    • CI/CD
    • Repository
  • Wiki
    • Wiki
  • Snippets
    • Snippets
  • Activity
  • Graph
  • Create a new issue
  • Jobs
  • Commits
  • Issue Boards
Collapse sidebar
  • yujiosaka
  • headless-chrome-crawler
  • Merge requests
  • !44
An error occurred while fetching the assigned milestone of the selected merge_request.

Force more strict closure jsdoc style

  • Review changes

  • Download
  • Email patches
  • Plain diff
Merged yujiosaka requested to merge closure_jsdoc_style into master 7 years ago
  • Overview 1
  • Commits 1
  • Pipelines 0
  • Changes 6
  • Annotations obey Closure annotations
Compare
  • master (base)

and
  • latest version
    1e13d0c1
    1 commit, 2 years ago

6 files
+ 118
- 120

    Preferences

    File browser
    Compare changes
ca‎che‎
bas‎e.js‎ +15 -21
redi‎s.js‎ +12 -0
sessi‎on.js‎ +12 -0
l‎ib‎
crawl‎er.js‎ +25 -12
hccraw‎ler.js‎ +40 -59
help‎er.js‎ +14 -28
cache/base.js
+ 15
- 21
  • View file @ 1e13d0c1

  • Edit in single-file editor

  • Open in Web IDE


/**
* @interface
*/
class BaseCache {
/**
* @param {!Object} settings
*/
constructor(settings) {
this._settings = settings;
}
/**
* Initializing the cache storage
* @return {Promise} resolves when init operation completed
* @interface
* @return {Promise}
*/
init() {
throw new Error('Init is not overridden!');
}
/**
* Closing the cache storage
* @return {Promise} resolves when close operation completed
* @interface
* @return {Promise}
*/
close() {
throw new Error('Close is not overridden!');
}
/**
* Clearing the cache storage
* @return {Promise} resolves when clear operation completed
* @interface
* @return {Promise}
*/
clear() {
throw new Error('Clear is not overridden!');
}
/**
* Method to check whether the requested options already exists in the cache storage
* @param {string} key
* @return {Promise} resolves whether the requested options already exists
* @interface
* @param {!string} key
* @return {Promise}
*/
exists() {
throw new Error('Get is not overridden!');
}
/**
* Method to set the requested options to the cache storage
* @param {string} key
* @return {Promise} resolves when set operation completed
* @interface
* @param {!string} key
* @return {Promise}
*/
set() {
throw new Error('Set is not overridden!');
}
/**
* Method to remove already requested option from the cache storage
* @param {string} key
* @return {Promise} resolves when remove operation completed
* @interface
* @param {!string} key
* @return {Promise}
*/
remove() {
throw new Error('Remove is not overridden!');
cache/redis.js
+ 12
- 0
  • View file @ 1e13d0c1

  • Edit in single-file editor

  • Open in Web IDE


const BaseCache = require('./base');
const redis = require('redis');
/**
* @implements {BaseCache}
*/
class RedisCache extends BaseCache {
/**
* @override
* @return {Promise}
*/
init() {
this._client = redis.createClient(this._settings);
@@ -11,6 +15,7 @@ class RedisCache extends BaseCache {
}
/**
* @return {Promise}
* @override
*/
clear() {
@@ -26,6 +31,7 @@ class RedisCache extends BaseCache {
}
/**
* @return {Promise}
* @override
*/
close() {
@@ -34,6 +40,8 @@ class RedisCache extends BaseCache {
}
/**
* @param {!string} key
* @return {Promise}
* @override
*/
exists(key) {
@@ -49,6 +57,8 @@ class RedisCache extends BaseCache {
}
/**
* @param {!string} key
* @return {Promise}
* @override
*/
set(key) {
@@ -64,6 +74,8 @@ class RedisCache extends BaseCache {
}
/**
* @param {!string} key
* @return {Promise}
* @override
*/
remove(key) {
cache/session.js
+ 12
- 0
  • View file @ 1e13d0c1

  • Edit in single-file editor

  • Open in Web IDE


const BaseCache = require('./base');
/**
* @implements {BaseCache}
*/
class SessionCache extends BaseCache {
/**
* @return {Promise}
* @override
*/
init() {
@@ -10,6 +14,7 @@ class SessionCache extends BaseCache {
}
/**
* @return {Promise}
* @override
*/
clear() {
@@ -18,6 +23,7 @@ class SessionCache extends BaseCache {
}
/**
* @return {Promise}
* @override
*/
close() {
@@ -26,6 +32,8 @@ class SessionCache extends BaseCache {
}
/**
* @param {!string} key
* @return {Promise}
* @override
*/
exists(key) {
@@ -33,6 +41,8 @@ class SessionCache extends BaseCache {
}
/**
* @param {!string} key
* @return {Promise}
* @override
*/
set(key) {
@@ -41,6 +51,8 @@ class SessionCache extends BaseCache {
}
/**
* @param {!string} key
* @return {Promise}
* @override
*/
remove(key) {
lib/crawler.js
+ 25
- 12
  • View file @ 1e13d0c1

  • Edit in single-file editor

  • Open in Web IDE


@@ -10,13 +10,17 @@ const GOTO_OPTIONS = [
const jQueryPath = require.resolve('jquery');
class Crawler {
/**
* @param {!Puppeteer.Page} page
* @param {!Object} options
*/
constructor(page, options) {
this._page = page;
this._options = options;
}
/**
* @return {Promise} resolved when crawling successfully ends
* @return {Promise}
*/
crawl() {
return this._prepare()
@@ -37,14 +41,14 @@ class Crawler {
}
/**
* @return {Promise} resolved when crawler is closed
* @return {Promise}
*/
close() {
return this._page.close();
}
/**
* @return {Promise} preparation completed
* @return {Promise}
* @private
*/
_prepare() {
@@ -59,7 +63,7 @@ class Crawler {
}
/**
* @return {Promise} resolved after preventing new tabs
* @return {Promise}
* @private
*/
_preventNewTabs() {
@@ -71,7 +75,7 @@ class Crawler {
}
/**
* @return {Promise} resolved after authentication
* @return {Promise}
* @private
*/
_authenticate() {
@@ -81,7 +85,7 @@ class Crawler {
}
/**
* @return {Promise} resolved after emulating devices
* @return {Promise}
* @private
*/
_emulate() {
@@ -90,7 +94,7 @@ class Crawler {
}
/**
* @return {Promise} resolved after setting user agent
* @return {Promise}
* @private
*/
_setUserAgent() {
@@ -99,7 +103,7 @@ class Crawler {
}
/**
* @return {Promise} resolved after setting extra headers
* @return {Promise}
* @private
*/
_setExtraHeaders() {
@@ -121,7 +125,7 @@ class Crawler {
/**
* @param {Puppeteer.Dialog} dialog
* @return {Promise} resolved after dialog is dismissed
* @return {Promise}
* @private
*/
_handleDialog(dialog) {
@@ -129,18 +133,26 @@ class Crawler {
return dialog.dismiss();
}
/**
* @return {Promise}
* @private
*/
_request() {
const gotoOptions = pick(this._options, GOTO_OPTIONS);
return this._page.goto(this._options.url, gotoOptions);
}
/**
* @return {Promise}
* @private
*/
_scrape() {
return this._addJQuery()
.then(() => this._page.evaluate(this._options.evaluatePage));
}
/**
* @return {Promise} resolved after adding jQuery
* @return {Promise}
* @private
*/
_addJQuery() {
@@ -149,7 +161,7 @@ class Crawler {
}
/**
* @return {Promise} resolved after screenshot is captured
* @return {Promise}
* @private
*/
_screenshot() {
@@ -158,7 +170,8 @@ class Crawler {
}
/**
* @return {Promise} resolved after collecting links
* @param {!string} baseUrl
* @return {Promise}
* @private
*/
_collectLinks(baseUrl) {
lib/hccrawler.js
+ 40
- 59
  • View file @ 1e13d0c1

  • Edit in single-file editor

  • Open in Web IDE


@@ -54,10 +54,8 @@ const deviceNames = Object.keys(devices);
class HCCrawler {
/**
* Connect to an existing Chromium instance
* @param {Object} options
* @return {Promise} resolved after successfully connecting a browser
* @static
* @param {Object=} options
* @return {Promise}
*/
static connect(options) {
return Puppeteer.connect(pick(options, PUPPETEER_CONNECT_OPTIONS))
@@ -66,10 +64,8 @@ class HCCrawler {
}
/**
* Launch a Chromium instance
* @param {Object} options
* @return {Promise} resolved after successfully launching a browser
* @static
* @param {Object=} options
* @return {Promise}
*/
static launch(options) {
return Puppeteer.launch(pick(options, PUPPETEER_LAUNCH_OPTIONS))
@@ -78,17 +74,15 @@ class HCCrawler {
}
/**
* A path where Puppeteer expects to find bundled Chromium.
* @return {String} executable path
* @static
* @return {string}
*/
static executablePath() {
return Puppeteer.executablePath();
}
/**
* @param {Puppeteer.Browser} browser
* @param {Object} options
* @param {!Puppeteer.Browser} browser
* @param {!Object} options
*/
constructor(browser, options) {
this._browser = browser;
@@ -112,7 +106,6 @@ class HCCrawler {
}
/**
* Queue requests
* @param {Object|Array|string} options
*/
queue(options) {
@@ -127,8 +120,7 @@ class HCCrawler {
}
/**
* Close the crawler
* @return {Promise} resolved when ther crawler is closed
* @return {Promise}
*/
close() {
return Promise.all([
@@ -138,8 +130,7 @@ class HCCrawler {
}
/**
* Disconnect from the Chromium instance
* @return {Promise} resolved when ther crawler disconnected
* @return {Promise}
*/
disconnect() {
return Promise.all([
@@ -149,50 +140,43 @@ class HCCrawler {
}
/**
* @return {Promise} resolved with HeadlessChrome/Chromium version
* @return {Promise}
*/
version() {
return this._browser.version();
}
/**
* @return {Promise} resolved with websocket url
* @return {Promise}
*/
wsEndpoint() {
return this._browser.wsEndpoint();
}
/**
* @return {Promise} resolved when queue is empty
* @return {Promise}
*/
onIdle() {
return this._pQueue.onIdle();
}
/**
* Set max request option after launch
* @param {!number} maxRequest
*/
setMaxRequest(maxRequest) {
this._options.maxRequest = maxRequest;
}
/**
* Pause request temporary
*/
pause() {
return this._pQueue.pause();
this._pQueue.pause();
}
/**
* Resume request temporary
*/
resume() {
return this._pQueue.start();
this._pQueue.start();
}
/**
* Clear cache
* @return {Promise} resolved when cache has been cleared
* @return {Promise}
*/
clearCache() {
if (!this._options.cache) return Promise.resolve();
@@ -200,43 +184,35 @@ class HCCrawler {
}
/**
* Get paused status
* @return {bolean} paused
* @readonly
* @return {bolean}
*/
get isPaused() {
return this._pQueue.isPaused();
}
/**
* Get the queue size
* @return {number} queue size
* @readonly
* @return {number}
*/
get queueSize() {
return this._pQueue.size;
}
/**
* Get the pending count
* @return {number} pending count
* @readonly
* @return {number}
*/
get pendingQueueSize() {
return this._pQueue.pending;
}
/**
* Get the requested count
* @return {number} requested count
* @readonly
* @return {number}
*/
get requestedCount() {
return this._requestedCount;
}
/**
* @return {Promise} resolved when initialization completed
* @return {Promise}
* @private
*/
_init() {
@@ -245,7 +221,7 @@ class HCCrawler {
}
/**
* @param {Object} options
* @param {!Object} options
* @private
*/
_validateOptions(options) {
@@ -257,7 +233,9 @@ class HCCrawler {
}
/**
* @param {Object} options
* @param {!Object} options
* @param {number=} depth
* @param {number=} retryCount
* @param {number} retryCount
* @private
*/
@@ -310,8 +288,9 @@ class HCCrawler {
}
/**
* @param {!Object} options
* @return {boolean}
* @private
* @return {boolean} whether target url is allowed
*/
_checkAllowedDomains(options) {
const { hostname } = URL.parse(options.url);
@@ -320,8 +299,8 @@ class HCCrawler {
}
/**
* @param {Object} options
* @return {Promise} whether the requested options already exists in the cache storage
* @param {!Object} options
* @return {Promise}
* @private
*/
_checkExists(options) {
@@ -332,8 +311,8 @@ class HCCrawler {
}
/**
* @param {Object} options
* @return {Promise} resolved when already accessed options are removed
* @param {!Object} options
* @return {Promise}
* @private
*/
_removeExists(options) {
@@ -343,9 +322,8 @@ class HCCrawler {
}
/**
* @param {Puppeteer.Page} page
* @param {Object} options
* @return {Promise} resolved whether request should be sent
* @param {!Object} options
* @return {Promise}
* @private
*/
_preRequest(options) {
@@ -354,8 +332,8 @@ class HCCrawler {
}
/**
* @param {Object} options
* @return {Promise} resolved when successfully opened a page
* @param {!Object} options
* @return {Promise}
* @private
*/
_newPage(options) {
@@ -364,6 +342,9 @@ class HCCrawler {
}
/**
* @param {!Array<!string>} links
* @param {!Object} options
* @param {number} depth
* @private
*/
_followLinks(links, options, depth) {
@@ -387,7 +368,7 @@ class HCCrawler {
}
/**
* @return {Promise} resolved when clear cache
* @return {Promise}
* @private
*/
_clearCacheOnEnd() {
@@ -396,7 +377,7 @@ class HCCrawler {
}
/**
* @return {Promise} resolved when cache has been closed
* @return {Promise}
* @private
*/
_closeCache() {
0 Assignees
None
Assign to
0 Reviewers
None
Request review from
Labels
0
None
0
None
    Assign labels
  • Manage project labels

Milestone
No milestone
None
None
Time tracking
No estimate or time spent
Lock merge request
Unlocked
0
0 participants
Reference:
Source branch: closure_jsdoc_style

Menu

Explore Projects Groups Snippets