parent
0904df44e4
commit
b78577fc28
@ -1,4 +0,0 @@ |
||||
package cn.linghang.mywust.core.exception; |
||||
|
||||
public class HtmlPageParseException extends BasicException { |
||||
} |
@ -0,0 +1,4 @@ |
||||
package cn.linghang.mywust.core.exception; |
||||
|
||||
public class ParseException extends BasicException { |
||||
} |
@ -0,0 +1,55 @@ |
||||
package cn.linghang.mywust.core.parser; |
||||
|
||||
import cn.linghang.mywust.core.exception.ParseException; |
||||
import cn.linghang.mywust.model.global.ClassRoom; |
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
|
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
/** |
||||
* <p>黄家湖校区教室编号解析(如12804(恒大楼二区804),30104(教三楼104), 11B304(教11B区304)这种)</p> |
||||
* <p>具体的教学楼名称和区域不作生成,只对相应的字段结构进行拆分解析,由调用者自行决定具体名称</p> |
||||
* |
||||
* @author lensfrex |
||||
* @create 2022-10-26 08:56 |
||||
*/ |
||||
public class HuangjiahuClassroomNameParser implements Parser<ClassRoom> { |
||||
private static final Logger log = LoggerFactory.getLogger(HuangjiahuClassroomNameParser.class); |
||||
|
||||
private static final Pattern CLASSROOM_PATTERN = Pattern.compile("(?<buildingId>\\d)(?<areaId>\\d)(?<room>\\d{3})"); |
||||
|
||||
private static final Pattern BUILDING_11_CLASSROOM_PATTERN = Pattern.compile("11(?<areaId>[A-C])(?<room>\\d{3})"); |
||||
|
||||
@Override |
||||
public ClassRoom parse(String classroomName) throws ParseException { |
||||
ClassRoom classRoom = ClassRoom.builder().campus("黄家湖").build(); |
||||
try { |
||||
Matcher matcher = CLASSROOM_PATTERN.matcher(classroomName); |
||||
// 不匹配普通教学楼正则的多半就是教11的教室
|
||||
if (matcher.find()) { |
||||
classRoom.setBuilding(matcher.group("buildingId")); |
||||
classRoom.setArea(matcher.group("areaId")); |
||||
classRoom.setRoom(matcher.group("room")); |
||||
} else { |
||||
matcher = BUILDING_11_CLASSROOM_PATTERN.matcher(classroomName); |
||||
if (matcher.find()) { |
||||
classRoom.setBuilding("11"); |
||||
classRoom.setArea(matcher.group("areaId")); |
||||
classRoom.setRoom(matcher.group("room")); |
||||
} else { |
||||
// 解析都不匹配就直接用传进来的编号作为教室
|
||||
classRoom.setBuilding("未知"); |
||||
classRoom.setArea("未知"); |
||||
classRoom.setRoom(classroomName); |
||||
} |
||||
} |
||||
} catch (Exception e) { |
||||
log.warn("解析教室编号失败,教室:{}", classroomName); |
||||
throw new ParseException(); |
||||
} |
||||
|
||||
return classRoom; |
||||
} |
||||
} |
@ -1,7 +1,7 @@ |
||||
package cn.linghang.mywust.core.parser; |
||||
|
||||
import cn.linghang.mywust.core.exception.HtmlPageParseException; |
||||
import cn.linghang.mywust.core.exception.ParseException; |
||||
|
||||
public interface Parser<T> { |
||||
public T parse(String html) throws HtmlPageParseException; |
||||
public T parse(String html) throws ParseException; |
||||
} |
||||
|
@ -0,0 +1,85 @@ |
||||
package cn.linghang.mywust.core.parser.physics; |
||||
|
||||
import cn.linghang.mywust.core.exception.ParseException; |
||||
import cn.linghang.mywust.core.parser.HuangjiahuClassroomNameParser; |
||||
import cn.linghang.mywust.core.parser.Parser; |
||||
import cn.linghang.mywust.core.parser.physics.xpath.PhysicsCourseXpath; |
||||
import cn.linghang.mywust.model.PhysicsCourse; |
||||
import cn.linghang.mywust.model.global.ClassRoom; |
||||
import cn.linghang.mywust.model.global.Course; |
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.List; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
public class PhysicsCoursePageParser implements Parser<List<PhysicsCourse>> { |
||||
private static final HuangjiahuClassroomNameParser HUANGJIAHU_CLASSROOM_NAME_PARSER = new HuangjiahuClassroomNameParser(); |
||||
|
||||
// 第几周
|
||||
private static final Pattern PHYSICS_COURSE_WEEK_PATTERN = Pattern.compile("第(?<week>.*?)周"); |
||||
|
||||
// 星期
|
||||
private static final Pattern PHYSICS_COURSE_WEEKDAY_PATTERN = Pattern.compile("星期(?<weekDay>[一二三四五六七日天]?)"); |
||||
|
||||
// 开始和结束时间
|
||||
private static final Pattern PHYSICS_COURSE_START_END_PATTERN = Pattern.compile("(?<start>\\d+?)[&\\-|~至](?<end>\\d+)节"); |
||||
|
||||
// 日期时间
|
||||
private static final Pattern PHYSICS_COURSE_DATE_PATTERN = Pattern.compile("(?<date>\\d{4}-\\d{2}-\\d{2}?)"); |
||||
|
||||
@Override |
||||
public List<PhysicsCourse> parse(String html) throws ParseException { |
||||
Elements courseElements = Jsoup.parse(html).selectXpath(PhysicsCourseXpath.COURSE_ROWS_XPATH); |
||||
if (courseElements.isEmpty()) { |
||||
throw new ParseException(); |
||||
} |
||||
|
||||
List<PhysicsCourse> courses = new ArrayList<>(courseElements.size()); |
||||
|
||||
// 从1开始,跳过表头
|
||||
for (int i = 1; i < courseElements.size(); i++) { |
||||
Elements columnContextElements = courseElements.get(i).getElementsByTag("td"); |
||||
PhysicsCourse course = new PhysicsCourse(); |
||||
|
||||
// 这里的代码硬编码了,不是很规范,抱歉
|
||||
course.setName(columnContextElements.get(1).text()); |
||||
course.setTeacher(columnContextElements.get(3).text().replace('\uE863', '䶮')); |
||||
|
||||
String classroomNumber = columnContextElements.get(5).text(); |
||||
ClassRoom classRoom = HUANGJIAHU_CLASSROOM_NAME_PARSER.parse(classroomNumber); |
||||
course.setClassroom(classRoom); |
||||
|
||||
String time = columnContextElements.get(4).text(); |
||||
|
||||
Matcher weekMatcher = PHYSICS_COURSE_WEEK_PATTERN.matcher(time); |
||||
if (weekMatcher.find()) { |
||||
// 物理实验,一个只有一节(一周),所以开始周和结束周是一样的
|
||||
course.setStartWeek(Integer.parseInt(weekMatcher.group("week"))); |
||||
course.setEndWeek(course.getStartWeek()); |
||||
} |
||||
|
||||
Matcher weekDayMatcher = PHYSICS_COURSE_WEEKDAY_PATTERN.matcher(time); |
||||
if (weekDayMatcher.find()) { |
||||
course.setWeekDay(Course.WEEKDAY_MAP.getOrDefault(weekDayMatcher.group("weekDay"), 1)); |
||||
} |
||||
|
||||
Matcher startEndMatcher = PHYSICS_COURSE_START_END_PATTERN.matcher(time); |
||||
if (startEndMatcher.find()) { |
||||
course.setStart(Integer.parseInt(startEndMatcher.group("start"))); |
||||
course.setEnd(Integer.parseInt(startEndMatcher.group("end"))); |
||||
} |
||||
|
||||
Matcher dateMatcher = PHYSICS_COURSE_DATE_PATTERN.matcher(time); |
||||
if (dateMatcher.find()) { |
||||
course.setDate(dateMatcher.group("date")); |
||||
} |
||||
|
||||
courses.add(course); |
||||
} |
||||
|
||||
return courses; |
||||
} |
||||
} |
@ -0,0 +1,13 @@ |
||||
package cn.linghang.mywust.core.parser.physics.xpath; |
||||
|
||||
public class PhysicsCourseXpath { |
||||
/** |
||||
* 用于获取表格中所有行的xpath |
||||
* */ |
||||
public static final String COURSE_ROWS_XPATH = "//*[@id=\"ID_PEE110301_gvpee120101\"]/tbody/tr"; |
||||
|
||||
public static final String COURSE_ROW_NAME_XPATH = ""; |
||||
public static final String COURSE_ROW_TEACHER_XPATH = ""; |
||||
public static final String COURSE_ROW_TIME_XPATH = ""; |
||||
public static final String COURSE_ROW_CLASSROOM_XPATH = ""; |
||||
} |
@ -0,0 +1,11 @@ |
||||
package cn.linghang.mywust.model; |
||||
|
||||
import cn.linghang.mywust.model.global.Course; |
||||
import lombok.Getter; |
||||
import lombok.Setter; |
||||
|
||||
@Getter |
||||
@Setter |
||||
public class PhysicsCourse extends Course { |
||||
private String date; |
||||
} |
@ -0,0 +1,32 @@ |
||||
package cn.linghang.mywust.model.global; |
||||
|
||||
import lombok.AllArgsConstructor; |
||||
import lombok.Builder; |
||||
import lombok.Data; |
||||
import lombok.NoArgsConstructor; |
||||
|
||||
@Data |
||||
@Builder |
||||
@AllArgsConstructor |
||||
@NoArgsConstructor |
||||
public class ClassRoom { |
||||
/** |
||||
* 校区,黄家湖或青山 |
||||
*/ |
||||
private String campus; |
||||
|
||||
/** |
||||
* 教学楼编号,如1, 2, 3, 11,对应教1楼(恒大楼),教2楼(理学院),教3楼(计院),教11 |
||||
*/ |
||||
private String building; |
||||
|
||||
/** |
||||
* 教学楼区域编号,如0,1,A,B等,对应0区(没有分区),1区,A区(教11) |
||||
*/ |
||||
private String area; |
||||
|
||||
/** |
||||
* 教室名,如301,208,802 |
||||
*/ |
||||
private String room; |
||||
} |
@ -0,0 +1,75 @@ |
||||
package cn.linghang.mywust.model.global; |
||||
|
||||
import lombok.AllArgsConstructor; |
||||
import lombok.Builder; |
||||
import lombok.Data; |
||||
import lombok.NoArgsConstructor; |
||||
|
||||
import java.util.HashMap; |
||||
import java.util.Map; |
||||
|
||||
@Data |
||||
@Builder |
||||
@AllArgsConstructor |
||||
@NoArgsConstructor |
||||
public class Course { |
||||
/** |
||||
* 课程名称 |
||||
*/ |
||||
private String name; |
||||
|
||||
/** |
||||
* 教师名称 |
||||
*/ |
||||
private String teacher; |
||||
|
||||
/** |
||||
* 教学班 |
||||
*/ |
||||
private String teachClass; |
||||
|
||||
/** |
||||
* 开始周 |
||||
*/ |
||||
private int startWeek; |
||||
|
||||
/** |
||||
* 结束周 |
||||
*/ |
||||
private int endWeek; |
||||
|
||||
/** |
||||
* 星期几 |
||||
*/ |
||||
private int weekDay; |
||||
|
||||
/** |
||||
* 开始时间 |
||||
*/ |
||||
private int start; |
||||
|
||||
/** |
||||
* 结束时间 |
||||
*/ |
||||
private int end; |
||||
|
||||
private ClassRoom classroom; |
||||
|
||||
public static final Map<String, Integer> WEEKDAY_MAP = makeWeekdayMap(); |
||||
|
||||
private static Map<String, Integer> makeWeekdayMap() { |
||||
HashMap<String, Integer> map = new HashMap<>(7 + 2); |
||||
|
||||
map.put("一", 1); |
||||
map.put("二", 2); |
||||
map.put("三", 3); |
||||
map.put("四", 4); |
||||
map.put("五", 5); |
||||
map.put("六", 6); |
||||
map.put("七", 7); |
||||
map.put("日", 7); |
||||
map.put("天", 7); |
||||
|
||||
return map; |
||||
} |
||||
} |
Loading…
Reference in new issue