From b78577fc28a51e0a1f256876a0b3b8175cbed0c4 Mon Sep 17 00:00:00 2001 From: lensferno Date: Wed, 26 Oct 2022 09:34:22 +0800 Subject: [PATCH] =?UTF-8?q?=E7=89=A9=E7=90=86=E5=AE=9E=E9=AA=8C=E8=AF=BE?= =?UTF-8?q?=E8=A1=A8=E8=A7=A3=E6=9E=90=EF=BC=88=E6=9C=AA=E6=B6=89=E5=8F=8A?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mywust-core/pom.xml | 9 +- .../exception/HtmlPageParseException.java | 4 - .../mywust/core/exception/ParseException.java | 4 + .../parser/HuangjiahuClassroomNameParser.java | 55 ++++++++++++ .../linghang/mywust/core/parser/Parser.java | 4 +- .../physics/PhysicsCoursePageParser.java | 85 +++++++++++++++++++ .../physics/PhysicsIndexPageParser.java | 6 +- .../physics/xpath/PhysicsCourseXpath.java | 13 +++ .../undergraduate/StudentInfoPageParser.java | 6 +- .../service/undergraduate/JwcService.java | 4 +- .../linghang/mywust/model/PhysicsCourse.java | 11 +++ .../mywust/model/global/ClassRoom.java | 32 +++++++ .../linghang/mywust/model/global/Course.java | 75 ++++++++++++++++ mywust-test/pom.xml | 25 +++++- 14 files changed, 316 insertions(+), 17 deletions(-) delete mode 100644 mywust-core/src/main/java/cn/linghang/mywust/core/exception/HtmlPageParseException.java create mode 100644 mywust-core/src/main/java/cn/linghang/mywust/core/exception/ParseException.java create mode 100644 mywust-core/src/main/java/cn/linghang/mywust/core/parser/HuangjiahuClassroomNameParser.java create mode 100644 mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsCoursePageParser.java create mode 100644 mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/xpath/PhysicsCourseXpath.java create mode 100644 mywust-model/src/main/java/cn/linghang/mywust/model/PhysicsCourse.java create mode 100644 mywust-model/src/main/java/cn/linghang/mywust/model/global/ClassRoom.java create mode 100644 mywust-model/src/main/java/cn/linghang/mywust/model/global/Course.java diff --git a/mywust-core/pom.xml b/mywust-core/pom.xml index 824bc92..88eb960 100644 --- a/mywust-core/pom.xml +++ b/mywust-core/pom.xml @@ -26,7 +26,7 @@ org.slf4j slf4j-api - 1.7.36 + 2.0.3 @@ -42,6 +42,13 @@ 1.15.3 + + + oro + oro + 2.0.8 + + cn.linghang mywust-model diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/exception/HtmlPageParseException.java b/mywust-core/src/main/java/cn/linghang/mywust/core/exception/HtmlPageParseException.java deleted file mode 100644 index 8bd82cd..0000000 --- a/mywust-core/src/main/java/cn/linghang/mywust/core/exception/HtmlPageParseException.java +++ /dev/null @@ -1,4 +0,0 @@ -package cn.linghang.mywust.core.exception; - -public class HtmlPageParseException extends BasicException { -} diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/exception/ParseException.java b/mywust-core/src/main/java/cn/linghang/mywust/core/exception/ParseException.java new file mode 100644 index 0000000..92a6ae7 --- /dev/null +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/exception/ParseException.java @@ -0,0 +1,4 @@ +package cn.linghang.mywust.core.exception; + +public class ParseException extends BasicException { +} diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/HuangjiahuClassroomNameParser.java b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/HuangjiahuClassroomNameParser.java new file mode 100644 index 0000000..002ffd6 --- /dev/null +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/HuangjiahuClassroomNameParser.java @@ -0,0 +1,55 @@ +package cn.linghang.mywust.core.parser; + +import cn.linghang.mywust.core.exception.ParseException; +import cn.linghang.mywust.model.global.ClassRoom; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + *

黄家湖校区教室编号解析(如12804(恒大楼二区804),30104(教三楼104), 11B304(教11B区304)这种)

+ *

具体的教学楼名称和区域不作生成,只对相应的字段结构进行拆分解析,由调用者自行决定具体名称

+ * + * @author lensfrex + * @create 2022-10-26 08:56 + */ +public class HuangjiahuClassroomNameParser implements Parser { + private static final Logger log = LoggerFactory.getLogger(HuangjiahuClassroomNameParser.class); + + private static final Pattern CLASSROOM_PATTERN = Pattern.compile("(?\\d)(?\\d)(?\\d{3})"); + + private static final Pattern BUILDING_11_CLASSROOM_PATTERN = Pattern.compile("11(?[A-C])(?\\d{3})"); + + @Override + public ClassRoom parse(String classroomName) throws ParseException { + ClassRoom classRoom = ClassRoom.builder().campus("黄家湖").build(); + try { + Matcher matcher = CLASSROOM_PATTERN.matcher(classroomName); + // 不匹配普通教学楼正则的多半就是教11的教室 + if (matcher.find()) { + classRoom.setBuilding(matcher.group("buildingId")); + classRoom.setArea(matcher.group("areaId")); + classRoom.setRoom(matcher.group("room")); + } else { + matcher = BUILDING_11_CLASSROOM_PATTERN.matcher(classroomName); + if (matcher.find()) { + classRoom.setBuilding("11"); + classRoom.setArea(matcher.group("areaId")); + classRoom.setRoom(matcher.group("room")); + } else { + // 解析都不匹配就直接用传进来的编号作为教室 + classRoom.setBuilding("未知"); + classRoom.setArea("未知"); + classRoom.setRoom(classroomName); + } + } + } catch (Exception e) { + log.warn("解析教室编号失败,教室:{}", classroomName); + throw new ParseException(); + } + + return classRoom; + } +} diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/Parser.java b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/Parser.java index 0c4d137..1685a9b 100644 --- a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/Parser.java +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/Parser.java @@ -1,7 +1,7 @@ package cn.linghang.mywust.core.parser; -import cn.linghang.mywust.core.exception.HtmlPageParseException; +import cn.linghang.mywust.core.exception.ParseException; public interface Parser { - public T parse(String html) throws HtmlPageParseException; + public T parse(String html) throws ParseException; } diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsCoursePageParser.java b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsCoursePageParser.java new file mode 100644 index 0000000..c56965a --- /dev/null +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsCoursePageParser.java @@ -0,0 +1,85 @@ +package cn.linghang.mywust.core.parser.physics; + +import cn.linghang.mywust.core.exception.ParseException; +import cn.linghang.mywust.core.parser.HuangjiahuClassroomNameParser; +import cn.linghang.mywust.core.parser.Parser; +import cn.linghang.mywust.core.parser.physics.xpath.PhysicsCourseXpath; +import cn.linghang.mywust.model.PhysicsCourse; +import cn.linghang.mywust.model.global.ClassRoom; +import cn.linghang.mywust.model.global.Course; +import org.jsoup.Jsoup; +import org.jsoup.select.Elements; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class PhysicsCoursePageParser implements Parser> { + private static final HuangjiahuClassroomNameParser HUANGJIAHU_CLASSROOM_NAME_PARSER = new HuangjiahuClassroomNameParser(); + + // 第几周 + private static final Pattern PHYSICS_COURSE_WEEK_PATTERN = Pattern.compile("第(?.*?)周"); + + // 星期 + private static final Pattern PHYSICS_COURSE_WEEKDAY_PATTERN = Pattern.compile("星期(?[一二三四五六七日天]?)"); + + // 开始和结束时间 + private static final Pattern PHYSICS_COURSE_START_END_PATTERN = Pattern.compile("(?\\d+?)[&\\-|~至](?\\d+)节"); + + // 日期时间 + private static final Pattern PHYSICS_COURSE_DATE_PATTERN = Pattern.compile("(?\\d{4}-\\d{2}-\\d{2}?)"); + + @Override + public List parse(String html) throws ParseException { + Elements courseElements = Jsoup.parse(html).selectXpath(PhysicsCourseXpath.COURSE_ROWS_XPATH); + if (courseElements.isEmpty()) { + throw new ParseException(); + } + + List courses = new ArrayList<>(courseElements.size()); + + // 从1开始,跳过表头 + for (int i = 1; i < courseElements.size(); i++) { + Elements columnContextElements = courseElements.get(i).getElementsByTag("td"); + PhysicsCourse course = new PhysicsCourse(); + + // 这里的代码硬编码了,不是很规范,抱歉 + course.setName(columnContextElements.get(1).text()); + course.setTeacher(columnContextElements.get(3).text().replace('\uE863', '䶮')); + + String classroomNumber = columnContextElements.get(5).text(); + ClassRoom classRoom = HUANGJIAHU_CLASSROOM_NAME_PARSER.parse(classroomNumber); + course.setClassroom(classRoom); + + String time = columnContextElements.get(4).text(); + + Matcher weekMatcher = PHYSICS_COURSE_WEEK_PATTERN.matcher(time); + if (weekMatcher.find()) { + // 物理实验,一个只有一节(一周),所以开始周和结束周是一样的 + course.setStartWeek(Integer.parseInt(weekMatcher.group("week"))); + course.setEndWeek(course.getStartWeek()); + } + + Matcher weekDayMatcher = PHYSICS_COURSE_WEEKDAY_PATTERN.matcher(time); + if (weekDayMatcher.find()) { + course.setWeekDay(Course.WEEKDAY_MAP.getOrDefault(weekDayMatcher.group("weekDay"), 1)); + } + + Matcher startEndMatcher = PHYSICS_COURSE_START_END_PATTERN.matcher(time); + if (startEndMatcher.find()) { + course.setStart(Integer.parseInt(startEndMatcher.group("start"))); + course.setEnd(Integer.parseInt(startEndMatcher.group("end"))); + } + + Matcher dateMatcher = PHYSICS_COURSE_DATE_PATTERN.matcher(time); + if (dateMatcher.find()) { + course.setDate(dateMatcher.group("date")); + } + + courses.add(course); + } + + return courses; + } +} diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsIndexPageParser.java b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsIndexPageParser.java index 4989a57..76cb4a4 100644 --- a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsIndexPageParser.java +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsIndexPageParser.java @@ -1,6 +1,6 @@ package cn.linghang.mywust.core.parser.physics; -import cn.linghang.mywust.core.exception.HtmlPageParseException; +import cn.linghang.mywust.core.exception.ParseException; import cn.linghang.mywust.core.parser.Parser; import cn.linghang.mywust.core.parser.physics.xpath.PhysicsIndexXpath; import org.jsoup.Jsoup; @@ -8,11 +8,11 @@ import org.jsoup.nodes.Document; import org.jsoup.select.Elements; public class PhysicsIndexPageParser implements Parser { - public String parse(String html) throws HtmlPageParseException { + public String parse(String html) throws ParseException { Document page = Jsoup.parse(html); Elements linkElements = page.selectXpath(PhysicsIndexXpath.PHYSICS_LINK_XPATH); if (linkElements.isEmpty()) { - throw new HtmlPageParseException(); + throw new ParseException(); } return linkElements.get(0).attr("href"); diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/xpath/PhysicsCourseXpath.java b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/xpath/PhysicsCourseXpath.java new file mode 100644 index 0000000..fcc9fdf --- /dev/null +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/xpath/PhysicsCourseXpath.java @@ -0,0 +1,13 @@ +package cn.linghang.mywust.core.parser.physics.xpath; + +public class PhysicsCourseXpath { + /** + * 用于获取表格中所有行的xpath + * */ + public static final String COURSE_ROWS_XPATH = "//*[@id=\"ID_PEE110301_gvpee120101\"]/tbody/tr"; + + public static final String COURSE_ROW_NAME_XPATH = ""; + public static final String COURSE_ROW_TEACHER_XPATH = ""; + public static final String COURSE_ROW_TIME_XPATH = ""; + public static final String COURSE_ROW_CLASSROOM_XPATH = ""; +} diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/undergraduate/StudentInfoPageParser.java b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/undergraduate/StudentInfoPageParser.java index c7c99e3..7338a86 100644 --- a/mywust-core/src/main/java/cn/linghang/mywust/core/parser/undergraduate/StudentInfoPageParser.java +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/parser/undergraduate/StudentInfoPageParser.java @@ -1,6 +1,6 @@ package cn.linghang.mywust.core.parser.undergraduate; -import cn.linghang.mywust.core.exception.HtmlPageParseException; +import cn.linghang.mywust.core.exception.ParseException; import cn.linghang.mywust.core.parser.Parser; import cn.linghang.mywust.core.parser.undergraduate.xpath.StudentInfoXpath; import cn.linghang.mywust.model.undergrade.StudentInfo; @@ -11,11 +11,11 @@ import org.jsoup.select.Elements; public class StudentInfoPageParser implements Parser { - public StudentInfo parse(String html) throws HtmlPageParseException { + public StudentInfo parse(String html) throws ParseException { Document page = Jsoup.parse(html); Element table = page.getElementById("xjkpTable"); if (table == null) { - throw new HtmlPageParseException(); + throw new ParseException(); } Elements studentElements = table.selectXpath(StudentInfoXpath.STUDENT_NUMBER); diff --git a/mywust-core/src/main/java/cn/linghang/mywust/core/service/undergraduate/JwcService.java b/mywust-core/src/main/java/cn/linghang/mywust/core/service/undergraduate/JwcService.java index 5c30c26..5977004 100644 --- a/mywust-core/src/main/java/cn/linghang/mywust/core/service/undergraduate/JwcService.java +++ b/mywust-core/src/main/java/cn/linghang/mywust/core/service/undergraduate/JwcService.java @@ -1,7 +1,7 @@ package cn.linghang.mywust.core.service.undergraduate; import cn.linghang.mywust.core.exception.CookieInvalidException; -import cn.linghang.mywust.core.exception.HtmlPageParseException; +import cn.linghang.mywust.core.exception.ParseException; import cn.linghang.mywust.core.parser.undergraduate.StudentInfoPageParser; import cn.linghang.mywust.core.request.BkjxRequestFactory; import cn.linghang.mywust.core.util.BkjxUtil; @@ -42,7 +42,7 @@ public class JwcService { return new String(response.getBody()); } - public StudentInfo getStudentInfo(String cookies, RequestClientOption requestOption) throws IOException, CookieInvalidException, HtmlPageParseException { + public StudentInfo getStudentInfo(String cookies, RequestClientOption requestOption) throws IOException, CookieInvalidException, ParseException { String studentInfoPage = this.getStudentInfoPage(cookies, requestOption); return studentInfoPageParser.parse(studentInfoPage); diff --git a/mywust-model/src/main/java/cn/linghang/mywust/model/PhysicsCourse.java b/mywust-model/src/main/java/cn/linghang/mywust/model/PhysicsCourse.java new file mode 100644 index 0000000..6ccffc2 --- /dev/null +++ b/mywust-model/src/main/java/cn/linghang/mywust/model/PhysicsCourse.java @@ -0,0 +1,11 @@ +package cn.linghang.mywust.model; + +import cn.linghang.mywust.model.global.Course; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class PhysicsCourse extends Course { + private String date; +} diff --git a/mywust-model/src/main/java/cn/linghang/mywust/model/global/ClassRoom.java b/mywust-model/src/main/java/cn/linghang/mywust/model/global/ClassRoom.java new file mode 100644 index 0000000..c9dc3be --- /dev/null +++ b/mywust-model/src/main/java/cn/linghang/mywust/model/global/ClassRoom.java @@ -0,0 +1,32 @@ +package cn.linghang.mywust.model.global; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class ClassRoom { + /** + * 校区,黄家湖或青山 + */ + private String campus; + + /** + * 教学楼编号,如1, 2, 3, 11,对应教1楼(恒大楼),教2楼(理学院),教3楼(计院),教11 + */ + private String building; + + /** + * 教学楼区域编号,如0,1,A,B等,对应0区(没有分区),1区,A区(教11) + */ + private String area; + + /** + * 教室名,如301,208,802 + */ + private String room; +} diff --git a/mywust-model/src/main/java/cn/linghang/mywust/model/global/Course.java b/mywust-model/src/main/java/cn/linghang/mywust/model/global/Course.java new file mode 100644 index 0000000..d8a519e --- /dev/null +++ b/mywust-model/src/main/java/cn/linghang/mywust/model/global/Course.java @@ -0,0 +1,75 @@ +package cn.linghang.mywust.model.global; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.util.HashMap; +import java.util.Map; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class Course { + /** + * 课程名称 + */ + private String name; + + /** + * 教师名称 + */ + private String teacher; + + /** + * 教学班 + */ + private String teachClass; + + /** + * 开始周 + */ + private int startWeek; + + /** + * 结束周 + */ + private int endWeek; + + /** + * 星期几 + */ + private int weekDay; + + /** + * 开始时间 + */ + private int start; + + /** + * 结束时间 + */ + private int end; + + private ClassRoom classroom; + + public static final Map WEEKDAY_MAP = makeWeekdayMap(); + + private static Map makeWeekdayMap() { + HashMap map = new HashMap<>(7 + 2); + + map.put("一", 1); + map.put("二", 2); + map.put("三", 3); + map.put("四", 4); + map.put("五", 5); + map.put("六", 6); + map.put("七", 7); + map.put("日", 7); + map.put("天", 7); + + return map; + } +} diff --git a/mywust-test/pom.xml b/mywust-test/pom.xml index 2773ef3..9866c01 100644 --- a/mywust-test/pom.xml +++ b/mywust-test/pom.xml @@ -24,13 +24,34 @@ ch.qos.logback logback-classic - 1.2.11 + 1.4.4 ch.qos.logback logback-core - 1.2.11 + 1.4.4 + + + org.dom4j + dom4j + 2.1.3 + + + + + net.sourceforge.jregex + jregex + 1.2_01 + + + + oro + oro + 2.0.8 + + +