From eb20ced7d1f167e7b655044e733251d8ccc4284b Mon Sep 17 00:00:00 2001 From: lensferno Date: Sun, 10 Jul 2022 08:03:00 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=8F=E6=9B=B4=E4=B8=80=E4=B8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../net/lensfrex/oj/collector/Collector.java | 154 +++++++++--------- .../java/net/lensfrex/oj/collector/Main.java | 10 +- .../net/lensfrex/oj/collector/Record.java | 2 + .../oj/collector/utils/ParamFiller.java | 1 + 4 files changed, 83 insertions(+), 84 deletions(-) diff --git a/src/main/java/net/lensfrex/oj/collector/Collector.java b/src/main/java/net/lensfrex/oj/collector/Collector.java index b7dd22e..0a53975 100644 --- a/src/main/java/net/lensfrex/oj/collector/Collector.java +++ b/src/main/java/net/lensfrex/oj/collector/Collector.java @@ -5,13 +5,12 @@ import net.lensfrex.oj.collector.data.QuestionDetail; import net.lensfrex.oj.collector.utils.IOUtil; import net.lensfrex.oj.collector.utils.NetworkUtil; import net.lensfrex.oj.collector.utils.Random; -import org.apache.http.client.utils.URIBuilder; import java.io.IOException; -import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; +import java.util.Map; public class Collector { @@ -21,52 +20,13 @@ public class Collector { private static final String INDEX_REQUEST_BODY = IOUtil.inputStreamToString(Record.class.getResourceAsStream("/index_request_body.txt"), StandardCharsets.UTF_8); + // leetcode每次最大只能获取100条。就算设置1000每次也只能获取到100条 private static final int PAGE_LIMIT = 100; - private String generatePageRequestUrl(String baseUrl, int offset, int limit, int page) throws URISyntaxException { - URIBuilder uriBuilder = new URIBuilder(baseUrl); - uriBuilder.addParameter("paging", "true"); - uriBuilder.addParameter("offset", String.valueOf(offset)); - uriBuilder.addParameter("limit", String.valueOf(limit)); - uriBuilder.addParameter("page", String.valueOf(page)); - - return uriBuilder.toString(); - } - - private HashMap getHeaders() { - HashMap headers = new HashMap<>(); - - headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44"); - headers.put("Host", "leetcode.cn"); - headers.put("Origin", "https://leetcode.cn"); - headers.put("Accept-Encoding", "gzip, deflate, br"); - headers.put("Content-Type", "application/json"); - - return headers; - } - - // -// private QuestionDetail[] getResults(String url, Map headers) throws IOException { -// String json = NetworkUtil.get(url, headers); -// -// JsonArray resultJsonArray = JsonParser.parseString(json).getAsJsonObject() -// .getAsJsonObject("data") -// .getAsJsonArray("results"); -// -// return new Gson().fromJson(resultJsonArray, QuestionDetail[].class); -// } - - private final Record record = new Record(); - private static final Random random = new Random(); - - // 起始位置 - private static final int START_AT = 0; - public ArrayList collectAllQuestion() { ArrayList QuestionDetails = new ArrayList<>(); try { - HashMap headers = getHeaders(); - + // 先拿一条记录看看有多少题目 String json = NetworkUtil.post(API_BASE, String.format(INDEX_REQUEST_BODY, 0, 1), headers); int totalResult = JsonParser.parseString(json).getAsJsonObject() @@ -76,55 +36,27 @@ public class Collector { int page = (totalResult / PAGE_LIMIT) + (totalResult % PAGE_LIMIT == 0 ? 0 : 1); - ArrayList questionNames = new ArrayList<>(totalResult); - System.out.println("Total results: " + totalResult); System.out.println("Page Limit: " + PAGE_LIMIT); System.out.println("Total pages: " + page); // Fetch all question list - for (int i = 1; i <= page; i++) { - int offset = (i - 1) * PAGE_LIMIT; - System.out.println("\n------------------------------------------------------"); - System.out.println("Getting page " + i); - System.out.println("Offset " + offset); - - json = NetworkUtil.post(API_BASE, String.format(INDEX_REQUEST_BODY, START_AT + offset, PAGE_LIMIT), headers); - - JsonArray questionJsonArray = - JsonParser.parseString(json).getAsJsonObject() - .getAsJsonObject("data") - .getAsJsonObject("problemsetQuestionList") - .getAsJsonArray("questions"); - - for (JsonElement jsonElement : questionJsonArray) { - questionNames.add(jsonElement.getAsJsonObject().get("titleSlug").getAsString()); - } - - System.out.println("Got result " + questionJsonArray.size()); - } + ArrayList questionNames = this.fetchQuestionList(page); System.out.println("Total result: " + questionNames.size()); // Fetch all question details - QuestionDetail detail = null; - for (String questionTitleSlug : questionNames) { - System.out.println("Getting question: " + questionTitleSlug); - json = NetworkUtil.post(API_BASE, String.format(DETAILS_REQUEST_BODY, questionTitleSlug), headers); - JsonObject question = - JsonParser.parseString(json).getAsJsonObject() - .getAsJsonObject("data") - .getAsJsonObject("question"); + for (String questionTitleSlug : questionNames) { + QuestionDetail detail = fetchQuestionDetail(questionTitleSlug); - detail = new Gson().fromJson(question, QuestionDetail.class); record.writeToFile( "D:\\ojs-leetcode", String.valueOf(detail.getId()), detail.getChineseTitle() == null ? detail.getTitle() : detail.getChineseTitle(), record.fillText(detail)); - Thread.sleep(random.getRandomNumber(120)); + Thread.sleep(random.getRandomNumber(256)); } } catch (IOException | InterruptedException e) { @@ -132,6 +64,78 @@ public class Collector { } System.out.println("Finish."); + return QuestionDetails; } +// +// private String generatePageRequestUrl(String baseUrl, int offset, int limit, int page) throws URISyntaxException { +// URIBuilder uriBuilder = new URIBuilder(baseUrl); +// uriBuilder.addParameter("paging", "true"); +// uriBuilder.addParameter("offset", String.valueOf(offset)); +// uriBuilder.addParameter("limit", String.valueOf(limit)); +// uriBuilder.addParameter("page", String.valueOf(page)); +// +// return uriBuilder.toString(); +// } + + private final Record record = new Record(); + private static final Random random = new Random(); + + private static HashMap setHeaders() { + HashMap headers = new HashMap<>(); + + headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44"); + headers.put("Host", "leetcode.cn"); + headers.put("Origin", "https://leetcode.cn"); + headers.put("Accept-Encoding", "gzip, deflate, br"); + headers.put("Content-Type", "application/json"); + + return headers; + } + + private static final Map headers = setHeaders(); + + // 起始位置 + private static final int START_AT = 0; + + private ArrayList fetchQuestionList(int page) throws IOException { + ArrayList questions = new ArrayList<>(page * PAGE_LIMIT); + String json = ""; + for (int i = 1; i <= page; i++) { + int offset = (i - 1) * PAGE_LIMIT; + System.out.println("\n------------------------------------------------------"); + System.out.println("Getting page " + i); + System.out.println("Offset " + offset); + + json = NetworkUtil.post(API_BASE, String.format(INDEX_REQUEST_BODY, START_AT + offset, PAGE_LIMIT), headers); + + JsonArray questionJsonArray = + JsonParser.parseString(json).getAsJsonObject() + .getAsJsonObject("data") + .getAsJsonObject("problemsetQuestionList") + .getAsJsonArray("questions"); + + for (JsonElement jsonElement : questionJsonArray) { + questions.add(jsonElement.getAsJsonObject().get("titleSlug").getAsString()); + } + + System.out.println("Got result " + questionJsonArray.size()); + } + + return questions; + } + + private QuestionDetail fetchQuestionDetail(String name) throws IOException { + System.out.println("\n------------------------------------------------------"); + System.out.println("Getting question: " + name); + + String json = NetworkUtil.post(API_BASE, String.format(DETAILS_REQUEST_BODY, name), headers); + + JsonObject question = + JsonParser.parseString(json).getAsJsonObject() + .getAsJsonObject("data") + .getAsJsonObject("question"); + + return new Gson().fromJson(question, QuestionDetail.class); + } } \ No newline at end of file diff --git a/src/main/java/net/lensfrex/oj/collector/Main.java b/src/main/java/net/lensfrex/oj/collector/Main.java index 21f8ffe..28d8ea6 100644 --- a/src/main/java/net/lensfrex/oj/collector/Main.java +++ b/src/main/java/net/lensfrex/oj/collector/Main.java @@ -24,14 +24,6 @@ public class Main { private static final String location = "D:\\ojs"; private void run() throws URISyntaxException, IOException { - Record record = new Record(); - ArrayList questionResults = new Collector().collectAllQuestion(); -// -// String context = ""; -// for (QuestionDetail questionResult : questionResults) { -// context = record.fillText(questionResult); -// -// record.writeToFile(location, String.valueOf(questionResult.getId()), questionResult.getChineseTitle(), context); -// } + new Collector().collectAllQuestion(); } } diff --git a/src/main/java/net/lensfrex/oj/collector/Record.java b/src/main/java/net/lensfrex/oj/collector/Record.java index 054b056..388aa23 100644 --- a/src/main/java/net/lensfrex/oj/collector/Record.java +++ b/src/main/java/net/lensfrex/oj/collector/Record.java @@ -60,6 +60,8 @@ public class Record { title = filenameFilter(title); File file = new File(String.format("%s/%s - %s.md", location, id, title)); + file.getParentFile().mkdirs(); + System.out.println("Will write to: " + file.getPath()); try { IOUtil.writeFile(context.getBytes(StandardCharsets.UTF_8), file); diff --git a/src/main/java/net/lensfrex/oj/collector/utils/ParamFiller.java b/src/main/java/net/lensfrex/oj/collector/utils/ParamFiller.java index 6ba7017..b732ccf 100644 --- a/src/main/java/net/lensfrex/oj/collector/utils/ParamFiller.java +++ b/src/main/java/net/lensfrex/oj/collector/utils/ParamFiller.java @@ -20,6 +20,7 @@ public class ParamFiller { if (value != null) { // 直接正则替换遇到$会报错 + // 后面会改一改 value = value.replace("$", "$"); } matcher.appendReplacement(stringBuffer, value == null ? "" : value);