Skip to content

Commit

Permalink
Update to crawl CC-MAIN-2022-49
Browse files Browse the repository at this point in the history
  • Loading branch information
centic9 committed Jan 9, 2023
1 parent 4f1bd9a commit 5cd4a00
Showing 1 changed file with 1 addition and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public class DownloadURLIndex {
private static final Logger log = LoggerFactory.make();

// https://commoncrawl.org/connect/blog/
public static final String CURRENT_CRAWL = "CC-MAIN-2022-33";
public static final String CURRENT_CRAWL = "CC-MAIN-2022-49";
public static final File COMMON_CRAWL_FILE = new File("commoncrawl-" + CURRENT_CRAWL + ".txt");

private static final int START_INDEX = 0;
Expand Down

0 comments on commit 5cd4a00

Please sign in to comment.