修正课表解析时遇到空字段产生异常无法继续解析的问题；更改bkjx的cookie验证网址

3 years ago · f44615c179
parent bbb3f98010
commit f44615c179
4 changed files with 36 additions and 34 deletions
--- a/mywust-core/src/main/java/cn/linghang/mywust/core/api/Bkjx.java
+++ b/mywust-core/src/main/java/cn/linghang/mywust/core/api/Bkjx.java
@ -10,7 +10,7 @@ import lombok.Getter;
 public class Bkjx {
    public static final String BKJX_SESSION_COOKIE_API = "http://bkjx.wust.edu.cn/jsxsd/sso.jsp?ticket=%s";

-    public static final String BKJX_TEST_API = "http://bkjx.wust.edu.cn/jsxsd/xxwcqk/xxwcqk_idxOnzh.do";
+    public static final String BKJX_TEST_API = "http://bkjx.wust.edu.cn/jsxsd/framework/blankPage.jsp";

    public static final String BKJX_STUDENT_INFO_API = "http://bkjx.wust.edu.cn/jsxsd/grxx/xsxx";

--- a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/undergraduate/CourseTableParser.java
+++ b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/undergraduate/CourseTableParser.java
@ -18,7 +18,9 @@ import java.util.regex.Pattern;
 public class CourseTableParser implements Parser<List<Course>> {
    private static final Logger log = LoggerFactory.getLogger(CourseTableParser.class);

-    private static final String COURSE_SPLIT_STR = " --------------------- ";
+    private static final String COURSE_SPLIT_STR = "---------------------";
+
+    private static final String COURSE_SPLIT_TAG_STR = "</div><div>";

    private static final Pattern WEEK_RANGE_REGEX = Pattern.compile("(?<startWeek>\\d+)-(?<endWeek>\\d+)\\(周\\)");

@ -38,48 +40,39 @@ public class CourseTableParser implements Parser<List<Course>> {

            List<Course> courses = new ArrayList<>(girds.size());

-            // 解析每个格子
            int girdCount = 0;
            for (Element gird : girds) {
                girdCount++;

-                // 直接获取格子里所有课程的关键字段，每个下表对应格子里相应的课程
-                Elements classElements = gird.getElementsByAttributeValue("title", "课堂名称");
-                Elements teacherElements = gird.getElementsByAttributeValue("title", "老师");
-                Elements timeElements = gird.getElementsByAttributeValue("title", "周次(节次)");
-                Elements classroomElements = gird.getElementsByAttributeValue("title", "教室");
-
-                // 如果一个格子有好几节课，用" --------------------- "切开，提取字段的时候用i指定对应字段的下表
-                String[] courseNames = gird.ownText().split(COURSE_SPLIT_STR);
-                // 解析格子里的所有课
-                for (int i = 0; i < courseNames.length; i++) {
+                // 将分隔符替换成标签，方便解析
+                String girdHtml = gird.outerHtml().replace(COURSE_SPLIT_STR, COURSE_SPLIT_TAG_STR);
+                Elements courseElements = Jsoup.parse(girdHtml).getElementsByTag("div");
+                for (Element courseElement : courseElements) {
+                    String courseName = courseElement.ownText();
+
                    // 格子文本为空，说明这个格子没课，直接跳过这个格子就行了
-                    if ("".equals(courseNames[i])) {
+                    if ("".equals(courseName)) {
                        continue;
                    }

+                    // 直接获取格子里所有课程的关键字段，每个下表对应格子里相应的课程
+                    Elements classElements = courseElement.getElementsByAttributeValue("title", "课堂名称");
+                    Elements teacherElements = courseElement.getElementsByAttributeValue("title", "老师");
+                    Elements timeElements = courseElement.getElementsByAttributeValue("title", "周次(节次)");
+                    Elements classroomElements = courseElement.getElementsByAttributeValue("title", "教室");
+
                    Course course = new Course();

-                    course.setName(courseNames[i]);
-                    course.setTeachClass(classElements.isEmpty() ? "" : classElements.get(i).text());
-                    course.setTeacher(teacherElements.isEmpty() ? "" : teacherElements.get(i).text());
+                    course.setName(courseName);
+                    course.setTeachClass(classElements.isEmpty() ? "" : classElements.get(0).text());
+                    course.setTeacher(teacherElements.isEmpty() ? "" : teacherElements.get(0).text());

                    ClassRoom classRoom = new ClassRoom();
-                    classRoom.setRoom(classroomElements.isEmpty() ? "" : classroomElements.get(i).text());
+                    classRoom.setRoom(classroomElements.isEmpty() ? "" : classroomElements.get(0).text());
                    course.setClassroom(classRoom);

-                    // 靠行位置来确定节次，而不是靠time字段的节次数据确定（因为太不好处理了）
-                    // 具体算法就是行索引x2 + 1就是开始的节次（索引从0开始）
-                    // 对于只有一个小节的课程，这类课程多数是在线课程，实际选课的时候照样会和其他课冲突，因此这里一律按照两小节大课处理
-                    int lineIndex = (girdCount - 1) / 7;
-                    course.setStartSection(lineIndex * 2 + 1);
-                    course.setEndSection(lineIndex * 2 + 2);
-
-                    int weekDay = girdCount % 7;
-                    course.setWeekDay(weekDay == 0 ? 7 : weekDay);
-
-                    // 提取周次信息，根据老项目，可能会有用","分成两段周次信息，但根据实际测试没有发现类似的课程
-                    String time = timeElements.isEmpty() ? "" : timeElements.get(i).text();
+                    // 提取周次信息
+                    String time = timeElements.isEmpty() ? "" : timeElements.get(0).text();
                    Matcher matcher = WEEK_RANGE_REGEX.matcher(time);
                    if (matcher.find()) {
                        course.setStartWeek(Integer.parseInt(matcher.group("startWeek")));
@ -93,6 +86,15 @@ public class CourseTableParser implements Parser<List<Course>> {
                        }
                    }

+                    // 靠行位置来确定节次，而不是靠time字段的节次数据确定（因为太不好处理了）
+                    // 具体算法就是行索引x2 + 1就是开始的节次（索引从0开始）
+                    int lineIndex = (int) (girdCount * 0.142);
+                    course.setStartSection(lineIndex * 2 + 1);
+                    course.setEndSection(lineIndex * 2 + 2);
+
+                    int weekDay = girdCount % 7;
+                    course.setWeekDay(weekDay == 0 ? 7 : weekDay);
+
                    courses.add(course);
                }
            }
--- a/mywust-core/src/main/java/cn/linghang/mywust/core/service/auth/JwcLogin.java
+++ b/mywust-core/src/main/java/cn/linghang/mywust/core/service/auth/JwcLogin.java
@ -51,15 +51,15 @@ public class JwcLogin {
    }

    private static final int COOKIES_ERROR_RESPONSE_LENGTH =
-            ("<script languge='javascript'>window.location.href=" +
-                    "'https://auth.wust.edu.cn/lyuapServer/login?service=http%3A%2F%2Fbkjx.wust.edu.cn%2Fjsxsd%2Fxxwcqk%2Fxxwcqk_idxOnzh.do'" +
+            ("<script languge='javascript'>" +
+                    "window.location.href='https://auth.wust.edu.cn/lyuapServer/login?service=http%3A%2F%2Fbkjx.wust.edu.cn%2Fjsxsd%2Fframework%2FblankPage.jsp'" +
                    "</script>").length();

    public boolean checkCookies(String cookies, RequestClientOption option) throws IOException {
        HttpRequest testRequest = BkjxRequestFactory.makeHttpRequest(Bkjx.BKJX_TEST_API, null, cookies);
        HttpResponse testResponse = requester.get(testRequest, option);

-        // 判断响应长度是否为178个字，登录跳转响应长度
+        // 判断响应长度是否为这么多个字，登录跳转响应长度
        // 这种办法虽然在极端情况下可能会出错（而且还挺蠢的），但是是最快的办法中比较准确的了
        return testResponse.getBody().length != COOKIES_ERROR_RESPONSE_LENGTH;
    }
--- a/mywust-test/src/test/java/CourseTableTest.java
+++ b/mywust-test/src/test/java/CourseTableTest.java
@ -17,7 +17,7 @@ public class CourseTableTest {
    }

    private void run() throws BasicException, IOException {
-        System.out.println("成绩获取");
+        System.out.println("课表");
        System.out.println("Cookie：");

        Scanner scanner = new Scanner(System.in);
@ -41,7 +41,7 @@ public class CourseTableTest {
        option.setProxy(proxy);
        option.setFallowUrlRedirect(false);

-        List<Course> courses = service.getCourseTable("2023-2024-2", cookie, option);
+        List<Course> courses = service.getCourseTable(term, cookie, option);

        for (Course info : courses) {
            System.out.println(info);