webrecorder
diff --git a/‎.eslintrc.cjs
+5-5 b/‎.eslintrc.cjs
+5-5
diff --git a/‎.github/workflows/ci.yaml
+23-30 b/‎.github/workflows/ci.yaml
+23-30
diff --git a/‎.github/workflows/release.yaml
+7-15 b/‎.github/workflows/release.yaml
+7-15
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎.husky/pre-commit
+1-1 b/‎.husky/pre-commit
+1-1
diff --git a/‎.pre-commit-config.yaml
+7-7 b/‎.pre-commit-config.yaml
+7-7
diff --git a/‎.prettierignore
+1 b/‎.prettierignore
+1
diff --git a/‎CHANGES.md
+13-5 b/‎CHANGES.md
+13-5
@@ -5,18 +5,18 @@ module.exports = {
     node: true,
     jest: true,
   },
-  extends: ["eslint:recommended", "plugin:@typescript-eslint/recommended"],
+  extends: [
+    "eslint:recommended",
+    "plugin:@typescript-eslint/recommended",
+    "prettier",
+  ],
   parser: "@typescript-eslint/parser",
   plugins: ["@typescript-eslint"],
   parserOptions: {
     ecmaVersion: 12,
     sourceType: "module",
   },
   rules: {
-    indent: ["error", 2],
-    "linebreak-style": ["error", "unix"],
-    quotes: ["error", "double"],
-    semi: ["error", "always"],
     "no-constant-condition": ["error", { checkLoops: false }],
     "no-use-before-define": [
       "error",
 
@@ -6,48 +6,41 @@ on:
 
 jobs:
   lint:
-
     runs-on: ubuntu-latest
 
     strategy:
       matrix:
         node-version: [18.x]
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Use Node.js ${{ matrix.node-version }}
-      uses: actions/setup-node@v3
-      with:
-        node-version: ${{ matrix.node-version }}
-    - name: install requirements
-      run: yarn install
-    - name: run linter
-      run: yarn lint
-  
-  build:
+      - uses: actions/checkout@v3
+      - name: Use Node.js ${{ matrix.node-version }}
+        uses: actions/setup-node@v3
+        with:
+          node-version: ${{ matrix.node-version }}
+      - name: install requirements
+        run: yarn install
+      - name: run linter
+        run: yarn lint && yarn format
 
+  build:
     runs-on: ubuntu-latest
 
     strategy:
       matrix:
         node-version: [18.x]
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Use Node.js ${{ matrix.node-version }}
-      uses: actions/setup-node@v3
-      with:
-        node-version: ${{ matrix.node-version }}
-    - name: install requirements
-      run: yarn install
-    - name: build js
-      run: yarn run tsc
-    - name: build docker
-      run: docker-compose build
-    - name: run jest
-      run: sudo yarn test
-        
-
-
-
-
+      - uses: actions/checkout@v3
+      - name: Use Node.js ${{ matrix.node-version }}
+        uses: actions/setup-node@v3
+        with:
+          node-version: ${{ matrix.node-version }}
+      - name: install requirements
+        run: yarn install
+      - name: build js
+        run: yarn run tsc
+      - name: build docker
+        run: docker-compose build
+      - name: run jest
+        run: sudo yarn test
@@ -8,44 +8,36 @@ jobs:
     name: Build x86 and ARM Images and push to Dockerhub
     runs-on: ubuntu-22.04
     steps:
-      - 
-        name: Check out the repo
+      - name: Check out the repo
         uses: actions/checkout@v4
 
-      -
-        name: Docker image metadata
+      - name: Docker image metadata
         id: meta
         uses: docker/metadata-action@v5
         with:
           images: webrecorder/browsertrix-crawler
           tags: |
             type=semver,pattern={{version}}
 
-      -
-        name: Set up QEMU
+      - name: Set up QEMU
         uses: docker/setup-qemu-action@v3
         with:
           platforms: arm64
 
-      -
-        name: Set up Docker Buildx
+      - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v1
-      -
-        name: Login to DockerHub
+      - name: Login to DockerHub
         uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
-      -
-        name: Build and push
+      - name: Build and push
         id: docker_build
         uses: docker/build-push-action@v3
         with:
           context: .
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           platforms: "linux/amd64,linux/arm64"
-      -
-        name: Image digest
+      - name: Image digest
         run: echo ${{ steps.docker_build.outputs.digest }}
-
@@ -6,3 +6,4 @@ node_modules/
 crawls/
 test-crawls/
 .DS_Store
+dist
@@ -1,4 +1,4 @@
 #!/usr/bin/env sh
 . "$(dirname -- "$0")/_/husky.sh"
 
-yarn lint
+yarn lint:fix
@@ -1,8 +1,8 @@
 repos:
-- repo: local
-  hooks:
-    - id: husky-run-pre-commit
-      name: husky
-      language: system
-      entry: .husky/pre-commit
-      pass_filenames: false
+  - repo: local
+    hooks:
+      - id: husky-run-pre-commit
+        name: husky
+        language: system
+        entry: .husky/pre-commit
+        pass_filenames: false
@@ -0,0 +1 @@
+dist
@@ -1,11 +1,13 @@
 ## CHANGES
 
 v0.8.1
+
 - Logging and Behavior Tweaks by @ikreymer in https://github.com/webrecorder/browsertrix-crawler/pull/229
 - Fix typos by @stavares843 in https://github.com/webrecorder/browsertrix-crawler/pull/232
 - Add crawl log to WACZ by @ikreymer in https://github.com/webrecorder/browsertrix-crawler/pull/231
 
 v0.8.0
+
 - Switch to Chrome/Chromium 109
 - Convert to ESM module
 - Add ad blocking via request interception (#173)
@@ -25,11 +27,13 @@ v0.8.0
 - update behaviors to 0.4.1, rename 'Behavior line' -> 'Behavior log' by @ikreymer in https://github.com/webrecorder/browsertrix-crawler/pull/223
 
 v0.7.1
+
 - Fix for warcio.js by @ikreymer in #178
 - Guard against pre-existing user/group by @edsu in #176
 - Fix incorrect combineWARCs property in README.md by @Georift in #180
 
 v0.7.0
+
 - Update to Chrome/Chromium 101 - (0.7.0 Beta 0) by @ikreymer in #144
 - Add --netIdleWait, bump dependencies (0.7.0-beta.2) by @ikreymer in #145
 - Update README.md by @atomotic in #147
@@ -41,7 +45,6 @@ v0.7.0
 - Interrupt Handling Fixes by @ikreymer in #167
 - Run in Docker as User by @edsu in #171
 
-
 v0.6.0
 
 - Add a --waitOnDone option, which has browsertrix crawler wait when finished (for use with Browsertrix Cloud)
@@ -56,8 +59,8 @@ v0.6.0
 - Fixes to interrupting a single instance in a shared state crawl
 - force all cookies, including session cookies, to fixed duration in days, configurable via --cookieDays
 
-
 v0.5.0
+
 - Scope: support for `scopeType: domain` to include all subdomains and ignoring 'www.' if specified in the seed.
 - Profiles: support loading remote profile from URL as well as local file
 - Non-HTML Pages: Load non-200 responses in browser, even if non-html, fix waiting issues with non-HTML pages (eg. PDFs)
@@ -75,8 +78,8 @@ v0.5.0
 - Signing: Support for optional signing of WACZ
 - Dependencies: update to latest pywb, wacz and browsertrix-behaviors packages
 
-
 v0.4.4
+
 - Page Block Rules Fix: 'request already handled' errors by avoiding adding duplicate handlers to same page.
 - Page Block Rules Fix: await all continue/abort() calls and catch errors.
 - Page Block Rules: Don't apply to top-level page, print warning and recommend scope rules instead.
@@ -86,18 +89,21 @@ v0.4.4
 - README: Update old type -> scopeType, list new scope types.
 
 v0.4.3
+
 - BlockRules Fixes: When considering the 'inFrameUrl' for a navigation request for an iframe, use URL of parent frame.
 - BlockRules Fixes: Always allow pywb proxy scripts.
 - Logging: Improved debug logging for block rules (log blocked requests and conditional iframe requests) when 'debug' set in 'logging'
 
 v0.4.2
+
 - Compose/docs: Build latest image by default, update README to refer to latest image
 - Fix typo in `crawler.capturePrefix` that resulted in `directFetchCapture()` always failing
 - Tests: Update all tests to use `test-crawls` directory
 - extractLinks() just extracts links from default selectors, allows custom driver to filter results
 - loadPage() accepts a list of selector options with selector, extract, and isAttribute settings for further customization of link extraction
 
 v0.4.1
+
 - BlockRules Optimizations: don't intercept requests if no blockRules
 - Profile Creation: Support extending existing profile by passing a --profile param to load on startup
 - Profile Creation: Set default window size to 1600x900, add --windowSize param for setting custom size
@@ -107,6 +113,7 @@ v0.4.1
 - CI: Build a multi-platform (amd64 and arm64) image on each release
 
 v0.4.0
+
 - YAML based config, specifyable via --config property or via stdin (with '--config stdin')
 - Support for different scope types ('page', 'prefix', 'host', 'any', 'none') + crawl depth at crawl level
 - Per-Seed scoping, including different scope types, or depth and include/exclude rules configurable per seed in 'seeds' list via YAML config
@@ -120,16 +127,17 @@ v0.4.0
 - Update to latest pywb (2.5.0b4), browsertrix-behaviors (0.2.3), py-wacz (0.3.1)
 
 v0.3.2
-- Added a `--urlFile` option: Allows users to specify a .txt file list of exact URLs to crawl (one URL per line).
 
+- Added a `--urlFile` option: Allows users to specify a .txt file list of exact URLs to crawl (one URL per line).
 
 v0.3.1
+
 - Improved shutdown wait: Instead of waiting for 5 secs, wait until all pending requests are written to WARCs
 - Bug fix: Use async APIs for combine WARC to avoid spurious issues with multiple crawls
 - Behaviors Update to Behaviors to 0.2.1, with support for facebook pages
 
-
 v0.3.0
+
 - WARC Combining: `--combineWARC` and `--rolloverSize` flags for generating combined WARC at end of crawl, each WARC upto specified rolloverSize
 - Profiles: Support for creating reusable browser profiles, stored as tarballs, and running crawl with a login profile (see README for more info)
 - Behaviors: Switch to Browsertrix Behaviors v0.1.1 for in-page behaviors