Skip to content

Commit 3b93970

Browse files
committed
fix
1 parent 79cec78 commit 3b93970

7 files changed

+99
-91
lines changed

.github/downloader

.github/organize_files.py

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import os
2+
import re
3+
import yaml
4+
import shutil
5+
from urllib.parse import urlparse
6+
7+
def load_config():
8+
with open('.github/url_dir_map_config.yml', 'r', encoding='utf-8') as f:
9+
return yaml.safe_load(f)
10+
11+
def extract_original_url(content):
12+
match = re.search(r'<!--\s*tcd_original_link\s+(.*?)\s*-->', content)
13+
return match.group(1) if match else None
14+
15+
def get_target_dir(url, config):
16+
if not url:
17+
return "未分类"
18+
19+
domain = urlparse(url).netloc
20+
for mapping in config['url_mappings']:
21+
if mapping['domain'] in domain:
22+
return mapping['dir']
23+
return "未分类"
24+
25+
def get_unique_filename(target_path, filename):
26+
base, ext = os.path.splitext(filename)
27+
counter = 1
28+
new_path = os.path.join(target_path, filename)
29+
30+
while os.path.exists(new_path):
31+
new_filename = f"{base}_{counter}{ext}"
32+
new_path = os.path.join(target_path, new_filename)
33+
counter += 1
34+
35+
return new_path
36+
37+
def main():
38+
config = load_config()
39+
workspace_dir = 'workspace'
40+
41+
# Create target directories if they don't exist
42+
for mapping in config['url_mappings']:
43+
dir_path = os.path.join(workspace_dir, mapping['dir'])
44+
os.makedirs(dir_path, exist_ok=True)
45+
46+
# Process markdown files
47+
for root, _, files in os.walk(workspace_dir):
48+
for file in files:
49+
if not file.endswith('.md'):
50+
continue
51+
52+
file_path = os.path.join(root, file)
53+
54+
# Skip files that are already in target directories
55+
if any(mapping['dir'] in file_path for mapping in config['url_mappings']):
56+
continue
57+
58+
with open(file_path, 'r', encoding='utf-8') as f:
59+
content = f.read()
60+
61+
url = extract_original_url(content)
62+
target_dir = get_target_dir(url, config)
63+
target_dir_path = os.path.join(workspace_dir, target_dir)
64+
65+
# Create target directory if it doesn't exist
66+
os.makedirs(target_dir_path, exist_ok=True)
67+
68+
# Get unique filename in target directory
69+
new_file_path = get_unique_filename(target_dir_path, file)
70+
71+
# Move file
72+
shutil.move(file_path, new_file_path)
73+
print(f"Moved {file} to {target_dir}")
74+
75+
if __name__ == "__main__":
76+
main()

.github/record/2024-12-24/downloads/page.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ www_特朗普称上任第一天就要阻止_“变性妄想”.html:
489489
title: 特朗普称上任第一天就要阻止 “变性妄想”
490490
snippet: 这位将在1月20日就职的美国总统称:“我将签署行政命令,结束切割儿童生殖器的做法,将跨性别者排除在军队之外,并将他们排除在小学、初中和高中之外”。
491491
visited_date: '2025-01-12 18:20:42'
492-
www_12岁时被“快速”接受性别转换_加州医生遭起诉(图).html:
492+
www_12岁时被“快速”接受性别转换_加州医生遭起诉_图_.html:
493493
link: https://www.secretchina.com/news/gb/2024/12/10/1074181.html
494494
md5: 3f3e6d895f11891233c0dd15848970dc
495495
title: 12岁时被“快速”接受性别转换 加州医生遭起诉(图)

.github/url_dir_map_config.yml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
url_mappings:
2+
- domain: "sina.com"
3+
dir: "新浪新闻"
4+
- domain: "sina.cn"
5+
dir: "新浪新闻"
6+
- domain: "news.qq.com"
7+
dir: "腾讯新闻"
8+
- domain: "163.com"
9+
dir: "网易新闻"
10+
- domain: "thepaper.cn"
11+
dir: "澎湃新闻"
12+
- domain: "sohu.com"
13+
dir: "搜狐新闻"
14+
- domain: "ifeng.com"
15+
dir: "凤凰网"
16+
- domain: "chinanews.com"
17+
dir: "中国新闻网"
18+
- domain: "*"
19+
dir: "未分类"

新浪新闻/bj_我与泰国人妖的3次亲密接触_组图__新浪生活.md

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747

4848
版权 © 1996 - 2006 SINA Corporation, All Rights Reserved.
4949

50+
<!-- tcd_original_link http://bj.sina.com.cn/t/2006-07-26/104298745.shtml -->
51+
5052
## 摘要与附加信息
5153

5254
<!-- tcd_abstract -->

未分类/news_2015:当青春撞见新时代.md

-89
This file was deleted.

0 commit comments

Comments
 (0)