parent
0904df44e4
commit
b78577fc28
@ -1,4 +0,0 @@ |
|||||||
package cn.linghang.mywust.core.exception; |
|
||||||
|
|
||||||
public class HtmlPageParseException extends BasicException { |
|
||||||
} |
|
@ -0,0 +1,4 @@ |
|||||||
|
package cn.linghang.mywust.core.exception; |
||||||
|
|
||||||
|
public class ParseException extends BasicException { |
||||||
|
} |
@ -0,0 +1,55 @@ |
|||||||
|
package cn.linghang.mywust.core.parser; |
||||||
|
|
||||||
|
import cn.linghang.mywust.core.exception.ParseException; |
||||||
|
import cn.linghang.mywust.model.global.ClassRoom; |
||||||
|
import org.slf4j.Logger; |
||||||
|
import org.slf4j.LoggerFactory; |
||||||
|
|
||||||
|
import java.util.regex.Matcher; |
||||||
|
import java.util.regex.Pattern; |
||||||
|
|
||||||
|
/** |
||||||
|
* <p>黄家湖校区教室编号解析(如12804(恒大楼二区804),30104(教三楼104), 11B304(教11B区304)这种)</p> |
||||||
|
* <p>具体的教学楼名称和区域不作生成,只对相应的字段结构进行拆分解析,由调用者自行决定具体名称</p> |
||||||
|
* |
||||||
|
* @author lensfrex |
||||||
|
* @create 2022-10-26 08:56 |
||||||
|
*/ |
||||||
|
public class HuangjiahuClassroomNameParser implements Parser<ClassRoom> { |
||||||
|
private static final Logger log = LoggerFactory.getLogger(HuangjiahuClassroomNameParser.class); |
||||||
|
|
||||||
|
private static final Pattern CLASSROOM_PATTERN = Pattern.compile("(?<buildingId>\\d)(?<areaId>\\d)(?<room>\\d{3})"); |
||||||
|
|
||||||
|
private static final Pattern BUILDING_11_CLASSROOM_PATTERN = Pattern.compile("11(?<areaId>[A-C])(?<room>\\d{3})"); |
||||||
|
|
||||||
|
@Override |
||||||
|
public ClassRoom parse(String classroomName) throws ParseException { |
||||||
|
ClassRoom classRoom = ClassRoom.builder().campus("黄家湖").build(); |
||||||
|
try { |
||||||
|
Matcher matcher = CLASSROOM_PATTERN.matcher(classroomName); |
||||||
|
// 不匹配普通教学楼正则的多半就是教11的教室
|
||||||
|
if (matcher.find()) { |
||||||
|
classRoom.setBuilding(matcher.group("buildingId")); |
||||||
|
classRoom.setArea(matcher.group("areaId")); |
||||||
|
classRoom.setRoom(matcher.group("room")); |
||||||
|
} else { |
||||||
|
matcher = BUILDING_11_CLASSROOM_PATTERN.matcher(classroomName); |
||||||
|
if (matcher.find()) { |
||||||
|
classRoom.setBuilding("11"); |
||||||
|
classRoom.setArea(matcher.group("areaId")); |
||||||
|
classRoom.setRoom(matcher.group("room")); |
||||||
|
} else { |
||||||
|
// 解析都不匹配就直接用传进来的编号作为教室
|
||||||
|
classRoom.setBuilding("未知"); |
||||||
|
classRoom.setArea("未知"); |
||||||
|
classRoom.setRoom(classroomName); |
||||||
|
} |
||||||
|
} |
||||||
|
} catch (Exception e) { |
||||||
|
log.warn("解析教室编号失败,教室:{}", classroomName); |
||||||
|
throw new ParseException(); |
||||||
|
} |
||||||
|
|
||||||
|
return classRoom; |
||||||
|
} |
||||||
|
} |
@ -1,7 +1,7 @@ |
|||||||
package cn.linghang.mywust.core.parser; |
package cn.linghang.mywust.core.parser; |
||||||
|
|
||||||
import cn.linghang.mywust.core.exception.HtmlPageParseException; |
import cn.linghang.mywust.core.exception.ParseException; |
||||||
|
|
||||||
public interface Parser<T> { |
public interface Parser<T> { |
||||||
public T parse(String html) throws HtmlPageParseException; |
public T parse(String html) throws ParseException; |
||||||
} |
} |
||||||
|
@ -0,0 +1,85 @@ |
|||||||
|
package cn.linghang.mywust.core.parser.physics; |
||||||
|
|
||||||
|
import cn.linghang.mywust.core.exception.ParseException; |
||||||
|
import cn.linghang.mywust.core.parser.HuangjiahuClassroomNameParser; |
||||||
|
import cn.linghang.mywust.core.parser.Parser; |
||||||
|
import cn.linghang.mywust.core.parser.physics.xpath.PhysicsCourseXpath; |
||||||
|
import cn.linghang.mywust.model.PhysicsCourse; |
||||||
|
import cn.linghang.mywust.model.global.ClassRoom; |
||||||
|
import cn.linghang.mywust.model.global.Course; |
||||||
|
import org.jsoup.Jsoup; |
||||||
|
import org.jsoup.select.Elements; |
||||||
|
|
||||||
|
import java.util.ArrayList; |
||||||
|
import java.util.List; |
||||||
|
import java.util.regex.Matcher; |
||||||
|
import java.util.regex.Pattern; |
||||||
|
|
||||||
|
public class PhysicsCoursePageParser implements Parser<List<PhysicsCourse>> { |
||||||
|
private static final HuangjiahuClassroomNameParser HUANGJIAHU_CLASSROOM_NAME_PARSER = new HuangjiahuClassroomNameParser(); |
||||||
|
|
||||||
|
// 第几周
|
||||||
|
private static final Pattern PHYSICS_COURSE_WEEK_PATTERN = Pattern.compile("第(?<week>.*?)周"); |
||||||
|
|
||||||
|
// 星期
|
||||||
|
private static final Pattern PHYSICS_COURSE_WEEKDAY_PATTERN = Pattern.compile("星期(?<weekDay>[一二三四五六七日天]?)"); |
||||||
|
|
||||||
|
// 开始和结束时间
|
||||||
|
private static final Pattern PHYSICS_COURSE_START_END_PATTERN = Pattern.compile("(?<start>\\d+?)[&\\-|~至](?<end>\\d+)节"); |
||||||
|
|
||||||
|
// 日期时间
|
||||||
|
private static final Pattern PHYSICS_COURSE_DATE_PATTERN = Pattern.compile("(?<date>\\d{4}-\\d{2}-\\d{2}?)"); |
||||||
|
|
||||||
|
@Override |
||||||
|
public List<PhysicsCourse> parse(String html) throws ParseException { |
||||||
|
Elements courseElements = Jsoup.parse(html).selectXpath(PhysicsCourseXpath.COURSE_ROWS_XPATH); |
||||||
|
if (courseElements.isEmpty()) { |
||||||
|
throw new ParseException(); |
||||||
|
} |
||||||
|
|
||||||
|
List<PhysicsCourse> courses = new ArrayList<>(courseElements.size()); |
||||||
|
|
||||||
|
// 从1开始,跳过表头
|
||||||
|
for (int i = 1; i < courseElements.size(); i++) { |
||||||
|
Elements columnContextElements = courseElements.get(i).getElementsByTag("td"); |
||||||
|
PhysicsCourse course = new PhysicsCourse(); |
||||||
|
|
||||||
|
// 这里的代码硬编码了,不是很规范,抱歉
|
||||||
|
course.setName(columnContextElements.get(1).text()); |
||||||
|
course.setTeacher(columnContextElements.get(3).text().replace('\uE863', '䶮')); |
||||||
|
|
||||||
|
String classroomNumber = columnContextElements.get(5).text(); |
||||||
|
ClassRoom classRoom = HUANGJIAHU_CLASSROOM_NAME_PARSER.parse(classroomNumber); |
||||||
|
course.setClassroom(classRoom); |
||||||
|
|
||||||
|
String time = columnContextElements.get(4).text(); |
||||||
|
|
||||||
|
Matcher weekMatcher = PHYSICS_COURSE_WEEK_PATTERN.matcher(time); |
||||||
|
if (weekMatcher.find()) { |
||||||
|
// 物理实验,一个只有一节(一周),所以开始周和结束周是一样的
|
||||||
|
course.setStartWeek(Integer.parseInt(weekMatcher.group("week"))); |
||||||
|
course.setEndWeek(course.getStartWeek()); |
||||||
|
} |
||||||
|
|
||||||
|
Matcher weekDayMatcher = PHYSICS_COURSE_WEEKDAY_PATTERN.matcher(time); |
||||||
|
if (weekDayMatcher.find()) { |
||||||
|
course.setWeekDay(Course.WEEKDAY_MAP.getOrDefault(weekDayMatcher.group("weekDay"), 1)); |
||||||
|
} |
||||||
|
|
||||||
|
Matcher startEndMatcher = PHYSICS_COURSE_START_END_PATTERN.matcher(time); |
||||||
|
if (startEndMatcher.find()) { |
||||||
|
course.setStart(Integer.parseInt(startEndMatcher.group("start"))); |
||||||
|
course.setEnd(Integer.parseInt(startEndMatcher.group("end"))); |
||||||
|
} |
||||||
|
|
||||||
|
Matcher dateMatcher = PHYSICS_COURSE_DATE_PATTERN.matcher(time); |
||||||
|
if (dateMatcher.find()) { |
||||||
|
course.setDate(dateMatcher.group("date")); |
||||||
|
} |
||||||
|
|
||||||
|
courses.add(course); |
||||||
|
} |
||||||
|
|
||||||
|
return courses; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,13 @@ |
|||||||
|
package cn.linghang.mywust.core.parser.physics.xpath; |
||||||
|
|
||||||
|
public class PhysicsCourseXpath { |
||||||
|
/** |
||||||
|
* 用于获取表格中所有行的xpath |
||||||
|
* */ |
||||||
|
public static final String COURSE_ROWS_XPATH = "//*[@id=\"ID_PEE110301_gvpee120101\"]/tbody/tr"; |
||||||
|
|
||||||
|
public static final String COURSE_ROW_NAME_XPATH = ""; |
||||||
|
public static final String COURSE_ROW_TEACHER_XPATH = ""; |
||||||
|
public static final String COURSE_ROW_TIME_XPATH = ""; |
||||||
|
public static final String COURSE_ROW_CLASSROOM_XPATH = ""; |
||||||
|
} |
@ -0,0 +1,11 @@ |
|||||||
|
package cn.linghang.mywust.model; |
||||||
|
|
||||||
|
import cn.linghang.mywust.model.global.Course; |
||||||
|
import lombok.Getter; |
||||||
|
import lombok.Setter; |
||||||
|
|
||||||
|
@Getter |
||||||
|
@Setter |
||||||
|
public class PhysicsCourse extends Course { |
||||||
|
private String date; |
||||||
|
} |
@ -0,0 +1,32 @@ |
|||||||
|
package cn.linghang.mywust.model.global; |
||||||
|
|
||||||
|
import lombok.AllArgsConstructor; |
||||||
|
import lombok.Builder; |
||||||
|
import lombok.Data; |
||||||
|
import lombok.NoArgsConstructor; |
||||||
|
|
||||||
|
@Data |
||||||
|
@Builder |
||||||
|
@AllArgsConstructor |
||||||
|
@NoArgsConstructor |
||||||
|
public class ClassRoom { |
||||||
|
/** |
||||||
|
* 校区,黄家湖或青山 |
||||||
|
*/ |
||||||
|
private String campus; |
||||||
|
|
||||||
|
/** |
||||||
|
* 教学楼编号,如1, 2, 3, 11,对应教1楼(恒大楼),教2楼(理学院),教3楼(计院),教11 |
||||||
|
*/ |
||||||
|
private String building; |
||||||
|
|
||||||
|
/** |
||||||
|
* 教学楼区域编号,如0,1,A,B等,对应0区(没有分区),1区,A区(教11) |
||||||
|
*/ |
||||||
|
private String area; |
||||||
|
|
||||||
|
/** |
||||||
|
* 教室名,如301,208,802 |
||||||
|
*/ |
||||||
|
private String room; |
||||||
|
} |
@ -0,0 +1,75 @@ |
|||||||
|
package cn.linghang.mywust.model.global; |
||||||
|
|
||||||
|
import lombok.AllArgsConstructor; |
||||||
|
import lombok.Builder; |
||||||
|
import lombok.Data; |
||||||
|
import lombok.NoArgsConstructor; |
||||||
|
|
||||||
|
import java.util.HashMap; |
||||||
|
import java.util.Map; |
||||||
|
|
||||||
|
@Data |
||||||
|
@Builder |
||||||
|
@AllArgsConstructor |
||||||
|
@NoArgsConstructor |
||||||
|
public class Course { |
||||||
|
/** |
||||||
|
* 课程名称 |
||||||
|
*/ |
||||||
|
private String name; |
||||||
|
|
||||||
|
/** |
||||||
|
* 教师名称 |
||||||
|
*/ |
||||||
|
private String teacher; |
||||||
|
|
||||||
|
/** |
||||||
|
* 教学班 |
||||||
|
*/ |
||||||
|
private String teachClass; |
||||||
|
|
||||||
|
/** |
||||||
|
* 开始周 |
||||||
|
*/ |
||||||
|
private int startWeek; |
||||||
|
|
||||||
|
/** |
||||||
|
* 结束周 |
||||||
|
*/ |
||||||
|
private int endWeek; |
||||||
|
|
||||||
|
/** |
||||||
|
* 星期几 |
||||||
|
*/ |
||||||
|
private int weekDay; |
||||||
|
|
||||||
|
/** |
||||||
|
* 开始时间 |
||||||
|
*/ |
||||||
|
private int start; |
||||||
|
|
||||||
|
/** |
||||||
|
* 结束时间 |
||||||
|
*/ |
||||||
|
private int end; |
||||||
|
|
||||||
|
private ClassRoom classroom; |
||||||
|
|
||||||
|
public static final Map<String, Integer> WEEKDAY_MAP = makeWeekdayMap(); |
||||||
|
|
||||||
|
private static Map<String, Integer> makeWeekdayMap() { |
||||||
|
HashMap<String, Integer> map = new HashMap<>(7 + 2); |
||||||
|
|
||||||
|
map.put("一", 1); |
||||||
|
map.put("二", 2); |
||||||
|
map.put("三", 3); |
||||||
|
map.put("四", 4); |
||||||
|
map.put("五", 5); |
||||||
|
map.put("六", 6); |
||||||
|
map.put("七", 7); |
||||||
|
map.put("日", 7); |
||||||
|
map.put("天", 7); |
||||||
|
|
||||||
|
return map; |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue