物理实验课表解析(未涉及获取)

old-package
lensfrex 2 years ago
parent 0904df44e4
commit b78577fc28
Signed by: lensfrex
GPG Key ID: 0F69A0A2FBEE98A0
  1. 9
      mywust-core/pom.xml
  2. 4
      mywust-core/src/main/java/cn/linghang/mywust/core/exception/HtmlPageParseException.java
  3. 4
      mywust-core/src/main/java/cn/linghang/mywust/core/exception/ParseException.java
  4. 55
      mywust-core/src/main/java/cn/linghang/mywust/core/parser/HuangjiahuClassroomNameParser.java
  5. 4
      mywust-core/src/main/java/cn/linghang/mywust/core/parser/Parser.java
  6. 85
      mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsCoursePageParser.java
  7. 6
      mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/PhysicsIndexPageParser.java
  8. 13
      mywust-core/src/main/java/cn/linghang/mywust/core/parser/physics/xpath/PhysicsCourseXpath.java
  9. 6
      mywust-core/src/main/java/cn/linghang/mywust/core/parser/undergraduate/StudentInfoPageParser.java
  10. 4
      mywust-core/src/main/java/cn/linghang/mywust/core/service/undergraduate/JwcService.java
  11. 11
      mywust-model/src/main/java/cn/linghang/mywust/model/PhysicsCourse.java
  12. 32
      mywust-model/src/main/java/cn/linghang/mywust/model/global/ClassRoom.java
  13. 75
      mywust-model/src/main/java/cn/linghang/mywust/model/global/Course.java
  14. 25
      mywust-test/pom.xml

@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId> <artifactId>slf4j-api</artifactId>
<version>1.7.36</version> <version>2.0.3</version>
</dependency> </dependency>
<dependency> <dependency>
@ -42,6 +42,13 @@
<version>1.15.3</version> <version>1.15.3</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/oro/oro -->
<dependency>
<groupId>oro</groupId>
<artifactId>oro</artifactId>
<version>2.0.8</version>
</dependency>
<dependency> <dependency>
<groupId>cn.linghang</groupId> <groupId>cn.linghang</groupId>
<artifactId>mywust-model</artifactId> <artifactId>mywust-model</artifactId>

@ -1,4 +0,0 @@
package cn.linghang.mywust.core.exception;
public class HtmlPageParseException extends BasicException {
}

@ -0,0 +1,4 @@
package cn.linghang.mywust.core.exception;
public class ParseException extends BasicException {
}

@ -0,0 +1,55 @@
package cn.linghang.mywust.core.parser;
import cn.linghang.mywust.core.exception.ParseException;
import cn.linghang.mywust.model.global.ClassRoom;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>黄家湖校区教室编号解析如12804恒大楼二区804,30104教三楼104, 11B304教11B区304这种</p>
* <p>具体的教学楼名称和区域不作生成只对相应的字段结构进行拆分解析由调用者自行决定具体名称</p>
*
* @author lensfrex
* @create 2022-10-26 08:56
*/
public class HuangjiahuClassroomNameParser implements Parser<ClassRoom> {
private static final Logger log = LoggerFactory.getLogger(HuangjiahuClassroomNameParser.class);
private static final Pattern CLASSROOM_PATTERN = Pattern.compile("(?<buildingId>\\d)(?<areaId>\\d)(?<room>\\d{3})");
private static final Pattern BUILDING_11_CLASSROOM_PATTERN = Pattern.compile("11(?<areaId>[A-C])(?<room>\\d{3})");
@Override
public ClassRoom parse(String classroomName) throws ParseException {
ClassRoom classRoom = ClassRoom.builder().campus("黄家湖").build();
try {
Matcher matcher = CLASSROOM_PATTERN.matcher(classroomName);
// 不匹配普通教学楼正则的多半就是教11的教室
if (matcher.find()) {
classRoom.setBuilding(matcher.group("buildingId"));
classRoom.setArea(matcher.group("areaId"));
classRoom.setRoom(matcher.group("room"));
} else {
matcher = BUILDING_11_CLASSROOM_PATTERN.matcher(classroomName);
if (matcher.find()) {
classRoom.setBuilding("11");
classRoom.setArea(matcher.group("areaId"));
classRoom.setRoom(matcher.group("room"));
} else {
// 解析都不匹配就直接用传进来的编号作为教室
classRoom.setBuilding("未知");
classRoom.setArea("未知");
classRoom.setRoom(classroomName);
}
}
} catch (Exception e) {
log.warn("解析教室编号失败,教室:{}", classroomName);
throw new ParseException();
}
return classRoom;
}
}

@ -1,7 +1,7 @@
package cn.linghang.mywust.core.parser; package cn.linghang.mywust.core.parser;
import cn.linghang.mywust.core.exception.HtmlPageParseException; import cn.linghang.mywust.core.exception.ParseException;
public interface Parser<T> { public interface Parser<T> {
public T parse(String html) throws HtmlPageParseException; public T parse(String html) throws ParseException;
} }

@ -0,0 +1,85 @@
package cn.linghang.mywust.core.parser.physics;
import cn.linghang.mywust.core.exception.ParseException;
import cn.linghang.mywust.core.parser.HuangjiahuClassroomNameParser;
import cn.linghang.mywust.core.parser.Parser;
import cn.linghang.mywust.core.parser.physics.xpath.PhysicsCourseXpath;
import cn.linghang.mywust.model.PhysicsCourse;
import cn.linghang.mywust.model.global.ClassRoom;
import cn.linghang.mywust.model.global.Course;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PhysicsCoursePageParser implements Parser<List<PhysicsCourse>> {
private static final HuangjiahuClassroomNameParser HUANGJIAHU_CLASSROOM_NAME_PARSER = new HuangjiahuClassroomNameParser();
// 第几周
private static final Pattern PHYSICS_COURSE_WEEK_PATTERN = Pattern.compile("第(?<week>.*?)周");
// 星期
private static final Pattern PHYSICS_COURSE_WEEKDAY_PATTERN = Pattern.compile("星期(?<weekDay>[一二三四五六七日天]?)");
// 开始和结束时间
private static final Pattern PHYSICS_COURSE_START_END_PATTERN = Pattern.compile("(?<start>\\d+?)[&\\-|~至](?<end>\\d+)节");
// 日期时间
private static final Pattern PHYSICS_COURSE_DATE_PATTERN = Pattern.compile("(?<date>\\d{4}-\\d{2}-\\d{2}?)");
@Override
public List<PhysicsCourse> parse(String html) throws ParseException {
Elements courseElements = Jsoup.parse(html).selectXpath(PhysicsCourseXpath.COURSE_ROWS_XPATH);
if (courseElements.isEmpty()) {
throw new ParseException();
}
List<PhysicsCourse> courses = new ArrayList<>(courseElements.size());
// 从1开始,跳过表头
for (int i = 1; i < courseElements.size(); i++) {
Elements columnContextElements = courseElements.get(i).getElementsByTag("td");
PhysicsCourse course = new PhysicsCourse();
// 这里的代码硬编码了,不是很规范,抱歉
course.setName(columnContextElements.get(1).text());
course.setTeacher(columnContextElements.get(3).text().replace('\uE863', '䶮'));
String classroomNumber = columnContextElements.get(5).text();
ClassRoom classRoom = HUANGJIAHU_CLASSROOM_NAME_PARSER.parse(classroomNumber);
course.setClassroom(classRoom);
String time = columnContextElements.get(4).text();
Matcher weekMatcher = PHYSICS_COURSE_WEEK_PATTERN.matcher(time);
if (weekMatcher.find()) {
// 物理实验,一个只有一节(一周),所以开始周和结束周是一样的
course.setStartWeek(Integer.parseInt(weekMatcher.group("week")));
course.setEndWeek(course.getStartWeek());
}
Matcher weekDayMatcher = PHYSICS_COURSE_WEEKDAY_PATTERN.matcher(time);
if (weekDayMatcher.find()) {
course.setWeekDay(Course.WEEKDAY_MAP.getOrDefault(weekDayMatcher.group("weekDay"), 1));
}
Matcher startEndMatcher = PHYSICS_COURSE_START_END_PATTERN.matcher(time);
if (startEndMatcher.find()) {
course.setStart(Integer.parseInt(startEndMatcher.group("start")));
course.setEnd(Integer.parseInt(startEndMatcher.group("end")));
}
Matcher dateMatcher = PHYSICS_COURSE_DATE_PATTERN.matcher(time);
if (dateMatcher.find()) {
course.setDate(dateMatcher.group("date"));
}
courses.add(course);
}
return courses;
}
}

@ -1,6 +1,6 @@
package cn.linghang.mywust.core.parser.physics; package cn.linghang.mywust.core.parser.physics;
import cn.linghang.mywust.core.exception.HtmlPageParseException; import cn.linghang.mywust.core.exception.ParseException;
import cn.linghang.mywust.core.parser.Parser; import cn.linghang.mywust.core.parser.Parser;
import cn.linghang.mywust.core.parser.physics.xpath.PhysicsIndexXpath; import cn.linghang.mywust.core.parser.physics.xpath.PhysicsIndexXpath;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
@ -8,11 +8,11 @@ import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
public class PhysicsIndexPageParser implements Parser<String> { public class PhysicsIndexPageParser implements Parser<String> {
public String parse(String html) throws HtmlPageParseException { public String parse(String html) throws ParseException {
Document page = Jsoup.parse(html); Document page = Jsoup.parse(html);
Elements linkElements = page.selectXpath(PhysicsIndexXpath.PHYSICS_LINK_XPATH); Elements linkElements = page.selectXpath(PhysicsIndexXpath.PHYSICS_LINK_XPATH);
if (linkElements.isEmpty()) { if (linkElements.isEmpty()) {
throw new HtmlPageParseException(); throw new ParseException();
} }
return linkElements.get(0).attr("href"); return linkElements.get(0).attr("href");

@ -0,0 +1,13 @@
package cn.linghang.mywust.core.parser.physics.xpath;
public class PhysicsCourseXpath {
/**
* 用于获取表格中所有行的xpath
* */
public static final String COURSE_ROWS_XPATH = "//*[@id=\"ID_PEE110301_gvpee120101\"]/tbody/tr";
public static final String COURSE_ROW_NAME_XPATH = "";
public static final String COURSE_ROW_TEACHER_XPATH = "";
public static final String COURSE_ROW_TIME_XPATH = "";
public static final String COURSE_ROW_CLASSROOM_XPATH = "";
}

@ -1,6 +1,6 @@
package cn.linghang.mywust.core.parser.undergraduate; package cn.linghang.mywust.core.parser.undergraduate;
import cn.linghang.mywust.core.exception.HtmlPageParseException; import cn.linghang.mywust.core.exception.ParseException;
import cn.linghang.mywust.core.parser.Parser; import cn.linghang.mywust.core.parser.Parser;
import cn.linghang.mywust.core.parser.undergraduate.xpath.StudentInfoXpath; import cn.linghang.mywust.core.parser.undergraduate.xpath.StudentInfoXpath;
import cn.linghang.mywust.model.undergrade.StudentInfo; import cn.linghang.mywust.model.undergrade.StudentInfo;
@ -11,11 +11,11 @@ import org.jsoup.select.Elements;
public class StudentInfoPageParser implements Parser<StudentInfo> { public class StudentInfoPageParser implements Parser<StudentInfo> {
public StudentInfo parse(String html) throws HtmlPageParseException { public StudentInfo parse(String html) throws ParseException {
Document page = Jsoup.parse(html); Document page = Jsoup.parse(html);
Element table = page.getElementById("xjkpTable"); Element table = page.getElementById("xjkpTable");
if (table == null) { if (table == null) {
throw new HtmlPageParseException(); throw new ParseException();
} }
Elements studentElements = table.selectXpath(StudentInfoXpath.STUDENT_NUMBER); Elements studentElements = table.selectXpath(StudentInfoXpath.STUDENT_NUMBER);

@ -1,7 +1,7 @@
package cn.linghang.mywust.core.service.undergraduate; package cn.linghang.mywust.core.service.undergraduate;
import cn.linghang.mywust.core.exception.CookieInvalidException; import cn.linghang.mywust.core.exception.CookieInvalidException;
import cn.linghang.mywust.core.exception.HtmlPageParseException; import cn.linghang.mywust.core.exception.ParseException;
import cn.linghang.mywust.core.parser.undergraduate.StudentInfoPageParser; import cn.linghang.mywust.core.parser.undergraduate.StudentInfoPageParser;
import cn.linghang.mywust.core.request.BkjxRequestFactory; import cn.linghang.mywust.core.request.BkjxRequestFactory;
import cn.linghang.mywust.core.util.BkjxUtil; import cn.linghang.mywust.core.util.BkjxUtil;
@ -42,7 +42,7 @@ public class JwcService {
return new String(response.getBody()); return new String(response.getBody());
} }
public StudentInfo getStudentInfo(String cookies, RequestClientOption requestOption) throws IOException, CookieInvalidException, HtmlPageParseException { public StudentInfo getStudentInfo(String cookies, RequestClientOption requestOption) throws IOException, CookieInvalidException, ParseException {
String studentInfoPage = this.getStudentInfoPage(cookies, requestOption); String studentInfoPage = this.getStudentInfoPage(cookies, requestOption);
return studentInfoPageParser.parse(studentInfoPage); return studentInfoPageParser.parse(studentInfoPage);

@ -0,0 +1,11 @@
package cn.linghang.mywust.model;
import cn.linghang.mywust.model.global.Course;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class PhysicsCourse extends Course {
private String date;
}

@ -0,0 +1,32 @@
package cn.linghang.mywust.model.global;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class ClassRoom {
/**
* 校区黄家湖或青山
*/
private String campus;
/**
* 教学楼编号如1, 2, 3, 11对应教1楼恒大楼教2楼理学院教3楼计院教11
*/
private String building;
/**
* 教学楼区域编号如01AB等对应0区没有分区1区A区教11
*/
private String area;
/**
* 教室名如301208802
*/
private String room;
}

@ -0,0 +1,75 @@
package cn.linghang.mywust.model.global;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.HashMap;
import java.util.Map;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class Course {
/**
* 课程名称
*/
private String name;
/**
* 教师名称
*/
private String teacher;
/**
* 教学班
*/
private String teachClass;
/**
* 开始周
*/
private int startWeek;
/**
* 结束周
*/
private int endWeek;
/**
* 星期几
*/
private int weekDay;
/**
* 开始时间
*/
private int start;
/**
* 结束时间
*/
private int end;
private ClassRoom classroom;
public static final Map<String, Integer> WEEKDAY_MAP = makeWeekdayMap();
private static Map<String, Integer> makeWeekdayMap() {
HashMap<String, Integer> map = new HashMap<>(7 + 2);
map.put("一", 1);
map.put("二", 2);
map.put("三", 3);
map.put("四", 4);
map.put("五", 5);
map.put("六", 6);
map.put("七", 7);
map.put("日", 7);
map.put("天", 7);
return map;
}
}

@ -24,13 +24,34 @@
<dependency> <dependency>
<groupId>ch.qos.logback</groupId> <groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId> <artifactId>logback-classic</artifactId>
<version>1.2.11</version> <version>1.4.4</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>ch.qos.logback</groupId> <groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId> <artifactId>logback-core</artifactId>
<version>1.2.11</version> <version>1.4.4</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.dom4j/dom4j -->
<dependency>
<groupId>org.dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>2.1.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sourceforge.jregex/jregex -->
<dependency>
<groupId>net.sourceforge.jregex</groupId>
<artifactId>jregex</artifactId>
<version>1.2_01</version>
</dependency>
<!-- https://mvnrepository.com/artifact/oro/oro -->
<dependency>
<groupId>oro</groupId>
<artifactId>oro</artifactId>
<version>2.0.8</version>
</dependency>
</dependencies> </dependencies>
<properties> <properties>

Loading…
Cancel
Save