html2text修改图片存储
Whisper Lv4

modify html2markdown source code,the location and format of pictures in html.

old version

note the first line, add the second line, and change the image format to ![](images/image-name)

new version 2020.1.16

add extra code in __init__.py__

1
2
3
4
5
6
7
8
9
10
11
12
13
14
def __init__(  
self,
img_path_name, # 加这行
imgage_month_path, # 加这行
out: Optional[OutCallback] = None,
baseurl: str = "",
bodywidth: int = config.BODY_WIDTH,
) -> None:
super().__init__(convert_charrefs=False)
self.imgage_month_path = imgage_month_path # 加这行,后面不要逗号,不然会变成tuple
self.img_path_name = img_path_name # 加这行,后面不要逗号,不然会变成tuple
self.split_next_td = False
self.td_count = 0
self.table_start = False

modify image format

1
2
3
4
5
6
7
8
9
10
11
if tag == "img" and start and not self.ignore_images:   
if "src" in attrs:
assert attrs["src"] is not None
if not self.images_to_alt:
src = attrs["src"]
img_name = src.split("/")[-1]
# add this line
attrs["href"] = "images/{0}/{1}/{2}".format(self.imgage_month_path,
self.img_path_name,
img_name)
alt = attrs.get("alt") or self.default_image_alt

final image format:

1
![](images/2023/xxx.png)