|
|
@ -5,13 +5,12 @@ import net.lensfrex.oj.collector.data.QuestionDetail; |
|
|
|
import net.lensfrex.oj.collector.utils.IOUtil; |
|
|
|
import net.lensfrex.oj.collector.utils.IOUtil; |
|
|
|
import net.lensfrex.oj.collector.utils.NetworkUtil; |
|
|
|
import net.lensfrex.oj.collector.utils.NetworkUtil; |
|
|
|
import net.lensfrex.oj.collector.utils.Random; |
|
|
|
import net.lensfrex.oj.collector.utils.Random; |
|
|
|
import org.apache.http.client.utils.URIBuilder; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import java.io.IOException; |
|
|
|
import java.io.IOException; |
|
|
|
import java.net.URISyntaxException; |
|
|
|
|
|
|
|
import java.nio.charset.StandardCharsets; |
|
|
|
import java.nio.charset.StandardCharsets; |
|
|
|
import java.util.ArrayList; |
|
|
|
import java.util.ArrayList; |
|
|
|
import java.util.HashMap; |
|
|
|
import java.util.HashMap; |
|
|
|
|
|
|
|
import java.util.Map; |
|
|
|
|
|
|
|
|
|
|
|
public class Collector { |
|
|
|
public class Collector { |
|
|
|
|
|
|
|
|
|
|
@ -21,52 +20,13 @@ public class Collector { |
|
|
|
|
|
|
|
|
|
|
|
private static final String INDEX_REQUEST_BODY = IOUtil.inputStreamToString(Record.class.getResourceAsStream("/index_request_body.txt"), StandardCharsets.UTF_8); |
|
|
|
private static final String INDEX_REQUEST_BODY = IOUtil.inputStreamToString(Record.class.getResourceAsStream("/index_request_body.txt"), StandardCharsets.UTF_8); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// leetcode每次最大只能获取100条。就算设置1000每次也只能获取到100条
|
|
|
|
private static final int PAGE_LIMIT = 100; |
|
|
|
private static final int PAGE_LIMIT = 100; |
|
|
|
|
|
|
|
|
|
|
|
private String generatePageRequestUrl(String baseUrl, int offset, int limit, int page) throws URISyntaxException { |
|
|
|
|
|
|
|
URIBuilder uriBuilder = new URIBuilder(baseUrl); |
|
|
|
|
|
|
|
uriBuilder.addParameter("paging", "true"); |
|
|
|
|
|
|
|
uriBuilder.addParameter("offset", String.valueOf(offset)); |
|
|
|
|
|
|
|
uriBuilder.addParameter("limit", String.valueOf(limit)); |
|
|
|
|
|
|
|
uriBuilder.addParameter("page", String.valueOf(page)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return uriBuilder.toString(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private HashMap<String, String> getHeaders() { |
|
|
|
|
|
|
|
HashMap<String, String> headers = new HashMap<>(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44"); |
|
|
|
|
|
|
|
headers.put("Host", "leetcode.cn"); |
|
|
|
|
|
|
|
headers.put("Origin", "https://leetcode.cn"); |
|
|
|
|
|
|
|
headers.put("Accept-Encoding", "gzip, deflate, br"); |
|
|
|
|
|
|
|
headers.put("Content-Type", "application/json"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return headers; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// private QuestionDetail[] getResults(String url, Map<String, String> headers) throws IOException {
|
|
|
|
|
|
|
|
// String json = NetworkUtil.get(url, headers);
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// JsonArray resultJsonArray = JsonParser.parseString(json).getAsJsonObject()
|
|
|
|
|
|
|
|
// .getAsJsonObject("data")
|
|
|
|
|
|
|
|
// .getAsJsonArray("results");
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// return new Gson().fromJson(resultJsonArray, QuestionDetail[].class);
|
|
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private final Record record = new Record(); |
|
|
|
|
|
|
|
private static final Random random = new Random(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 起始位置
|
|
|
|
|
|
|
|
private static final int START_AT = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public ArrayList<QuestionDetail> collectAllQuestion() { |
|
|
|
public ArrayList<QuestionDetail> collectAllQuestion() { |
|
|
|
ArrayList<QuestionDetail> QuestionDetails = new ArrayList<>(); |
|
|
|
ArrayList<QuestionDetail> QuestionDetails = new ArrayList<>(); |
|
|
|
try { |
|
|
|
try { |
|
|
|
HashMap<String, String> headers = getHeaders(); |
|
|
|
// 先拿一条记录看看有多少题目
|
|
|
|
|
|
|
|
|
|
|
|
String json = NetworkUtil.post(API_BASE, String.format(INDEX_REQUEST_BODY, 0, 1), headers); |
|
|
|
String json = NetworkUtil.post(API_BASE, String.format(INDEX_REQUEST_BODY, 0, 1), headers); |
|
|
|
|
|
|
|
|
|
|
|
int totalResult = JsonParser.parseString(json).getAsJsonObject() |
|
|
|
int totalResult = JsonParser.parseString(json).getAsJsonObject() |
|
|
@ -76,55 +36,27 @@ public class Collector { |
|
|
|
|
|
|
|
|
|
|
|
int page = (totalResult / PAGE_LIMIT) + (totalResult % PAGE_LIMIT == 0 ? 0 : 1); |
|
|
|
int page = (totalResult / PAGE_LIMIT) + (totalResult % PAGE_LIMIT == 0 ? 0 : 1); |
|
|
|
|
|
|
|
|
|
|
|
ArrayList<String> questionNames = new ArrayList<>(totalResult); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
System.out.println("Total results: " + totalResult); |
|
|
|
System.out.println("Total results: " + totalResult); |
|
|
|
System.out.println("Page Limit: " + PAGE_LIMIT); |
|
|
|
System.out.println("Page Limit: " + PAGE_LIMIT); |
|
|
|
System.out.println("Total pages: " + page); |
|
|
|
System.out.println("Total pages: " + page); |
|
|
|
|
|
|
|
|
|
|
|
// Fetch all question list
|
|
|
|
// Fetch all question list
|
|
|
|
for (int i = 1; i <= page; i++) { |
|
|
|
ArrayList<String> questionNames = this.fetchQuestionList(page); |
|
|
|
int offset = (i - 1) * PAGE_LIMIT; |
|
|
|
|
|
|
|
System.out.println("\n------------------------------------------------------"); |
|
|
|
|
|
|
|
System.out.println("Getting page " + i); |
|
|
|
|
|
|
|
System.out.println("Offset " + offset); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json = NetworkUtil.post(API_BASE, String.format(INDEX_REQUEST_BODY, START_AT + offset, PAGE_LIMIT), headers); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
JsonArray questionJsonArray = |
|
|
|
|
|
|
|
JsonParser.parseString(json).getAsJsonObject() |
|
|
|
|
|
|
|
.getAsJsonObject("data") |
|
|
|
|
|
|
|
.getAsJsonObject("problemsetQuestionList") |
|
|
|
|
|
|
|
.getAsJsonArray("questions"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (JsonElement jsonElement : questionJsonArray) { |
|
|
|
|
|
|
|
questionNames.add(jsonElement.getAsJsonObject().get("titleSlug").getAsString()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
System.out.println("Got result " + questionJsonArray.size()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
System.out.println("Total result: " + questionNames.size()); |
|
|
|
System.out.println("Total result: " + questionNames.size()); |
|
|
|
|
|
|
|
|
|
|
|
// Fetch all question details
|
|
|
|
// Fetch all question details
|
|
|
|
QuestionDetail detail = null; |
|
|
|
|
|
|
|
for (String questionTitleSlug : questionNames) { |
|
|
|
|
|
|
|
System.out.println("Getting question: " + questionTitleSlug); |
|
|
|
|
|
|
|
json = NetworkUtil.post(API_BASE, String.format(DETAILS_REQUEST_BODY, questionTitleSlug), headers); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
JsonObject question = |
|
|
|
for (String questionTitleSlug : questionNames) { |
|
|
|
JsonParser.parseString(json).getAsJsonObject() |
|
|
|
QuestionDetail detail = fetchQuestionDetail(questionTitleSlug); |
|
|
|
.getAsJsonObject("data") |
|
|
|
|
|
|
|
.getAsJsonObject("question"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
detail = new Gson().fromJson(question, QuestionDetail.class); |
|
|
|
|
|
|
|
record.writeToFile( |
|
|
|
record.writeToFile( |
|
|
|
"D:\\ojs-leetcode", |
|
|
|
"D:\\ojs-leetcode", |
|
|
|
String.valueOf(detail.getId()), |
|
|
|
String.valueOf(detail.getId()), |
|
|
|
detail.getChineseTitle() == null ? detail.getTitle() : detail.getChineseTitle(), |
|
|
|
detail.getChineseTitle() == null ? detail.getTitle() : detail.getChineseTitle(), |
|
|
|
record.fillText(detail)); |
|
|
|
record.fillText(detail)); |
|
|
|
|
|
|
|
|
|
|
|
Thread.sleep(random.getRandomNumber(120)); |
|
|
|
Thread.sleep(random.getRandomNumber(256)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
} catch (IOException | InterruptedException e) { |
|
|
|
} catch (IOException | InterruptedException e) { |
|
|
@ -132,6 +64,78 @@ public class Collector { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
System.out.println("Finish."); |
|
|
|
System.out.println("Finish."); |
|
|
|
|
|
|
|
|
|
|
|
return QuestionDetails; |
|
|
|
return QuestionDetails; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// private String generatePageRequestUrl(String baseUrl, int offset, int limit, int page) throws URISyntaxException {
|
|
|
|
|
|
|
|
// URIBuilder uriBuilder = new URIBuilder(baseUrl);
|
|
|
|
|
|
|
|
// uriBuilder.addParameter("paging", "true");
|
|
|
|
|
|
|
|
// uriBuilder.addParameter("offset", String.valueOf(offset));
|
|
|
|
|
|
|
|
// uriBuilder.addParameter("limit", String.valueOf(limit));
|
|
|
|
|
|
|
|
// uriBuilder.addParameter("page", String.valueOf(page));
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// return uriBuilder.toString();
|
|
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private final Record record = new Record(); |
|
|
|
|
|
|
|
private static final Random random = new Random(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static HashMap<String, String> setHeaders() { |
|
|
|
|
|
|
|
HashMap<String, String> headers = new HashMap<>(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44"); |
|
|
|
|
|
|
|
headers.put("Host", "leetcode.cn"); |
|
|
|
|
|
|
|
headers.put("Origin", "https://leetcode.cn"); |
|
|
|
|
|
|
|
headers.put("Accept-Encoding", "gzip, deflate, br"); |
|
|
|
|
|
|
|
headers.put("Content-Type", "application/json"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return headers; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static final Map<String, String> headers = setHeaders(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 起始位置
|
|
|
|
|
|
|
|
private static final int START_AT = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private ArrayList<String> fetchQuestionList(int page) throws IOException { |
|
|
|
|
|
|
|
ArrayList<String> questions = new ArrayList<>(page * PAGE_LIMIT); |
|
|
|
|
|
|
|
String json = ""; |
|
|
|
|
|
|
|
for (int i = 1; i <= page; i++) { |
|
|
|
|
|
|
|
int offset = (i - 1) * PAGE_LIMIT; |
|
|
|
|
|
|
|
System.out.println("\n------------------------------------------------------"); |
|
|
|
|
|
|
|
System.out.println("Getting page " + i); |
|
|
|
|
|
|
|
System.out.println("Offset " + offset); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json = NetworkUtil.post(API_BASE, String.format(INDEX_REQUEST_BODY, START_AT + offset, PAGE_LIMIT), headers); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
JsonArray questionJsonArray = |
|
|
|
|
|
|
|
JsonParser.parseString(json).getAsJsonObject() |
|
|
|
|
|
|
|
.getAsJsonObject("data") |
|
|
|
|
|
|
|
.getAsJsonObject("problemsetQuestionList") |
|
|
|
|
|
|
|
.getAsJsonArray("questions"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (JsonElement jsonElement : questionJsonArray) { |
|
|
|
|
|
|
|
questions.add(jsonElement.getAsJsonObject().get("titleSlug").getAsString()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
System.out.println("Got result " + questionJsonArray.size()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return questions; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private QuestionDetail fetchQuestionDetail(String name) throws IOException { |
|
|
|
|
|
|
|
System.out.println("\n------------------------------------------------------"); |
|
|
|
|
|
|
|
System.out.println("Getting question: " + name); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
String json = NetworkUtil.post(API_BASE, String.format(DETAILS_REQUEST_BODY, name), headers); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
JsonObject question = |
|
|
|
|
|
|
|
JsonParser.parseString(json).getAsJsonObject() |
|
|
|
|
|
|
|
.getAsJsonObject("data") |
|
|
|
|
|
|
|
.getAsJsonObject("question"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return new Gson().fromJson(question, QuestionDetail.class); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |